From 721f7add0cf5ac6e6247483628e985742c09e45f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 27 Nov 2020 09:52:48 +0000 Subject: [PATCH] miscsearch: implement ->newsgroup_matches This may be used to speed up newsgroup searches down-the-line, but the grep perlop isn't too shabby, at the moment. --- lib/PublicInbox/MiscSearch.pm | 40 +++++++++++++++++++++++++++++++++++ t/extsearch.t | 4 ++++ 2 files changed, 44 insertions(+) diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm index 48ef6914..f2e31443 100644 --- a/lib/PublicInbox/MiscSearch.pm +++ b/lib/PublicInbox/MiscSearch.pm @@ -76,6 +76,46 @@ sub mset { retry_reopen($self, \&misc_enquire_once, $qr, $opt); } +sub ibx_matches_once { # retry_reopen callback + my ($self, $qr, $by_newsgroup) = @_; + # double in case no newsgroups are configured: + my $limit = scalar(keys %$by_newsgroup) * 2; + my $opt = { limit => $limit, offset => 0, relevance => -1 }; + my $ret = {}; # newsgroup => $ibx of matches + while (1) { + my $mset = misc_enquire_once($self, $qr, $opt); + for my $mi ($mset->items) { + my $doc = $mi->get_document; + my $end = $doc->termlist_end; + my $cur = $doc->termlist_begin; + $cur->skip_to('Q'); + if ($cur != $end) { + my $ng = $cur->get_termname; # eidx_key + $ng =~ s/\AQ// or warn "BUG: no `Q': $ng"; + if (my $ibx = $by_newsgroup->{$ng}) { + $ret->{$ng} = $ibx; + } + } else { + warn <get_docid} has no `Q' (eidx_key) term +EOF + } + } + my $nr = $mset->size; + return $ret if $nr < $limit; + $opt->{offset} += $nr; + } +} + +# returns a newsgroup => PublicInbox::Inbox mapping +sub newsgroup_matches { + my ($self, $qs, $pi_cfg) = @_; + my $qp = $self->{qp} //= mi_qp_new($self); + $qs .= ' type:inbox'; + my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS); + retry_reopen($self, \&ibx_matches_once, $qr, $pi_cfg->{-by_newsgroup}); +} + sub ibx_data_once { my ($self, $ibx) = @_; my $xdb = $self->{xdb}; diff --git a/t/extsearch.t b/t/extsearch.t index 0045294b..85cdf74a 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -79,5 +79,9 @@ my @it = $misc->mset('')->items; is(scalar(@it), 2, 'two inboxes'); like($it[0]->get_document->get_data, qr/v2test/, 'docdata matched v2'); like($it[1]->get_document->get_data, qr/v1test/, 'docdata matched v1'); +my $pi_cfg = PublicInbox::Config->new; +$pi_cfg->fill_all; +my $ret = $misc->newsgroup_matches('', $pi_cfg); +is_deeply($pi_cfg->{-by_newsgroup}, $ret, '->newsgroup_matches'); done_testing; -- 2.48.1