From: Eric Wong (Contractor, The Linux Foundation) Date: Mon, 26 Feb 2018 23:41:11 +0000 (+0000) Subject: searchidx: index values in the threader X-Git-Tag: v1.1.0-pre1~212 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=e63db5a1a563fb18db53fe1b8349dc52f325bd8b searchidx: index values in the threader We will need timestamp, YYYYMMDD, article number, and line count for querying thread information (including XOVER for NNTP). --- diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 00b24d68..b5d43d12 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -141,18 +141,20 @@ sub add_val ($$$) { $doc->add_value($col, $num); } -sub add_values ($$$$) { - my ($smsg, $bytes, $num, $lines) = @_; +sub add_values ($$) { + my ($doc, $values) = @_; - my $ts = $smsg->ts; - my $doc = $smsg->{doc}; - add_val($doc, &PublicInbox::Search::TS, $ts); + my $ts = $values->[PublicInbox::Search::TS]; + add_val($doc, PublicInbox::Search::TS, $ts); - defined($num) and add_val($doc, &PublicInbox::Search::NUM, $num); + my $num = $values->[PublicInbox::Search::NUM]; + defined($num) and add_val($doc, PublicInbox::Search::NUM, $num); - defined($bytes) and add_val($doc, &PublicInbox::Search::BYTES, $bytes); + my $bytes = $values->[PublicInbox::Search::BYTES]; + defined($bytes) and add_val($doc, PublicInbox::Search::BYTES, $bytes); - add_val($doc, &PublicInbox::Search::LINES, $lines); + my $lines = $values->[PublicInbox::Search::LINES]; + add_val($doc, PublicInbox::Search::LINES, $lines); my $yyyymmdd = strftime('%Y%m%d', gmtime($ts)); add_val($doc, PublicInbox::Search::YYYYMMDD, $yyyymmdd); @@ -307,7 +309,8 @@ sub add_message { } my $lines = $mime->body_raw =~ tr!\n!\n!; - add_values($smsg, $bytes, $num, $lines); + my @values = ($smsg->ts, $num, $bytes, $lines); + add_values($doc, \@values); my $tg = $self->term_generator; @@ -360,7 +363,8 @@ sub add_message { my $refs = parse_references($smsg); my $data = $smsg->to_doc_data($blob); if ($threader) { - $threader->thread_msg($mid, $smsg->ts, $xpath, $data); + push @values, $mid, $xpath, $data; + $threader->thread_msg(\@values); } else { link_message($self, $smsg, $refs, $old_tid); } diff --git a/lib/PublicInbox/SearchIdxThread.pm b/lib/PublicInbox/SearchIdxThread.pm index 57bb293f..6b50eb00 100644 --- a/lib/PublicInbox/SearchIdxThread.pm +++ b/lib/PublicInbox/SearchIdxThread.pm @@ -61,30 +61,34 @@ sub thread_worker_loop { $xdb->begin_transaction; $txn = 1; } - eval { $self->thread_msg_real(@$msg) }; - warn "failed to index message <$msg->[0]>: $@\n" if $@; + eval { $self->thread_msg_real($msg) }; + warn "failed to index message <$msg->[-1]>: $@\n" if $@; } } } # called by a partition worker sub thread_msg { - my ($self, $mid, $ts, $xpath, $doc_data) = @_; + my ($self, $values) = @_; my $w = $self->{w}; my $err; - my $str = freeze([ $mid, $ts, $xpath, $doc_data ]); - my $len = length($str) . "\n"; + my $str = freeze($values); + $str = length($str) . "\n" . $str; # multiple processes write to the same pipe, so use flock $self->_lock_acquire; - print $w $len, $str or $err = $!; + print $w $str or $err = $!; $self->_lock_release; die "print failed: $err\n" if $err; } sub thread_msg_real { - my ($self, $mid, $ts, $xpath, $doc_data) = @_; + my ($self, $values) = @_; + my $doc_data = pop @$values; + my $xpath = pop @$values; + my $mid = pop @$values; + my $ts = $values->[PublicInbox::Search::TS]; my $smsg = $self->lookup_message($mid); my ($old_tid, $doc_id); if ($smsg) { @@ -99,6 +103,7 @@ sub thread_msg_real { my $doc = $smsg->{doc}; $doc->add_term('XPATH' . $xpath) if defined $xpath; $doc->add_term('XMID' . $mid); + PublicInbox::SearchIdx::add_values($doc, $values); $doc->set_data($doc_data); $smsg->{ts} = $ts; $smsg->load_from_data($doc_data);