lib/PublicInbox/LeiOverview.pm | 8 +++----- lib/PublicInbox/LeiSearch.pm | 16 +++------------- lib/PublicInbox/LeiXSearch.pm | 14 ++++++++++---- lib/PublicInbox/Search.pm | 20 +++++++++++++++++++- diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index 8799f1ccec56fcd5c06f96bf54507f4608934692..47d9eb318f821daea650b39ef15dc5c4c0d8d805 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -224,9 +224,8 @@ $self->{git} = $git; my $git_dir = $git->{git_dir}; sub { my ($smsg, $mitem) = @_; - my $kw = []; # TODO get from mitem $l2m->wq_do('write_mail', \@io, $git_dir, - $smsg->{blob}, $lei_ipc, $kw) + $smsg->{blob}, $lei_ipc, $smsg->{kw}); } } elsif ($l2m) { my $wcb = $l2m->write_cb($lei); @@ -235,8 +234,8 @@ $self->{git} = $git; # for ovv_atexit_child my $g2m = $l2m->can('git_to_mail'); sub { my ($smsg, $mitem) = @_; - my $kw = []; # TODO get from mitem - $git->cat_async($smsg->{blob}, $g2m, [ $wcb, $kw ]); + $git->cat_async($smsg->{blob}, $g2m, + [ $wcb, $smsg->{kw} ]); }; } elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) { my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},"; @@ -266,7 +265,6 @@ my $ORS = $self->{fmt} eq 'json' ? ",\n" : "\n"; # JSONL $lei->{ovv_buf} = \(my $buf = ''); sub { my ($smsg, $mitem) = @_; - delete @$smsg{qw(tid num)}; $buf .= $json->encode(_unbless_smsg(@_)) . $ORS; if (length($buf) > 65536) { my $lk = $self->lock_for_scope; diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index b7e337de38d7b870d1045ba417de28fedb1f010a..440bacf527e60037d1854017cb529eba96570419 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -5,7 +5,7 @@ package PublicInbox::LeiSearch; use strict; use v5.10.1; use parent qw(PublicInbox::ExtSearch); -use PublicInbox::Search; +use PublicInbox::Search qw(xap_terms); # get combined docid from over.num: # (not generic Xapian, only works with our sharding scheme) @@ -19,19 +19,9 @@ sub msg_keywords { my ($self, $num) = @_; # num_or_mitem my $xdb = $self->xdb; # set {nshard}; my $docid = ref($num) ? $num->get_docid : num2docid($self, $num); - my %kw; - eval { - my $end = $xdb->termlist_end($docid); - my $cur = $xdb->termlist_begin($docid); - for (; $cur != $end; $cur++) { - $cur->skip_to('K'); - last if $cur == $end; - my $kw = $cur->get_termname; - $kw =~ s/\AK//s and $kw{$kw} = undef; - } - }; + my $kw = xap_terms('K', $xdb, $docid); warn "E: #$docid ($num): $@\n" if $@; - wantarray ? sort(keys(%kw)) : \%kw; + wantarray ? sort(keys(%$kw)) : $kw; } 1; diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index a6d827de9c30b444fc3dea1d392172fea1749038..d7688ede9c343eeb1188d993000872299697941c 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -13,6 +13,7 @@ use PublicInbox::OpPipe; use PublicInbox::Import; use File::Temp 0.19 (); # 0.19 for ->newdir use File::Spec (); +use PublicInbox::Search qw(xap_terms); sub new { my ($class) = @_; @@ -74,7 +75,12 @@ my $nshard = $self->{nshard}; my $docid = $mitem->get_docid; my $shard = ($docid - 1) % $nshard; my $num = int(($docid - 1) / $nshard) + 1; - my $smsg = $self->{shard2ibx}->[$shard]->over->get_art($num); + my $ibx = $self->{shard2ibx}->[$shard]; + my $smsg = $ibx->over->get_art($num); + if (ref($ibx->can('msg_keywords'))) { + my $kw = xap_terms('K', $mitem->get_document); + $smsg->{kw} = [ sort keys %$kw ]; + } $smsg->{docid} = $docid; $smsg; } @@ -153,11 +159,11 @@ my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing'; $dedupe->prepare_dedupe; do { $mset = $self->mset($mo->{qstr}, $mo); - for my $it ($mset->items) { - my $smsg = smsg_for($self, $it) or next; + for my $mitem ($mset->items) { + my $smsg = smsg_for($self, $mitem) or next; wait_startq($startq) if $startq; next if $dedupe->is_smsg_dup($smsg); - $each_smsg->($smsg, $it); + $each_smsg->($smsg, $mitem); } } while (_mset_more($mset, $mo)); undef $each_smsg; # drops @io for l2m->{each_smsg_done} diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index a4b40f94976b9c8897a322e032637d115d6dd923..7c6a16bec9a1fa87e57c304cf2b1c7a4d635cec0 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -6,7 +6,7 @@ # Read-only search interface for use by the web and NNTP interfaces package PublicInbox::Search; use strict; use parent qw(Exporter); -our @EXPORT_OK = qw(retry_reopen int_val get_pct); +our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms); use List::Util qw(max); # values for searching, changing the numeric value breaks @@ -430,6 +430,24 @@ # thread skeleton view. says the value isn't # very meaningful, anyways. my $n = $_[0]->get_percent; $n > 99 ? 99 : $n; +} + +sub xap_terms ($$;@) { + my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty () + my %ret; + eval { + my $end = $xdb_or_doc->termlist_end(@docid); + my $cur = $xdb_or_doc->termlist_begin(@docid); + for (; $cur != $end; $cur++) { + $cur->skip_to($pfx); + last if $cur == $end; + my $tn = $cur->get_termname; + if (index($tn, $pfx) == 0) { + $ret{substr($tn, length($pfx))} = undef; + } + } + }; + \%ret; } 1;