1 # Copyright (C) 2020 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # read-only counterpart to MiscIdx
5 package PublicInbox::MiscSearch;
8 use PublicInbox::Search qw(retry_reopen);
10 # Xapian value columns:
13 # avoid conflicting with message Search::prob_prefix for UI/UX reasons
15 description => 'S', # $INBOX_DIR/description
19 infourl => 'XINFOURL',
21 '' => 'S A XLISTID XNAME XURL XINFOURL'
25 my ($class, $dir) = @_;
27 xdb => $PublicInbox::Search::X{Database}->new($dir)
34 my $xdb = $self->{xdb};
35 my $qp = $PublicInbox::Search::X{QueryParser}->new;
36 $qp->set_default_op(PublicInbox::Search::OP_AND());
37 $qp->set_database($xdb);
38 $qp->set_stemmer(PublicInbox::Search::stemmer($self));
39 $qp->set_stemming_strategy(PublicInbox::Search::STEM_SOME());
40 my $cb = $qp->can('set_max_wildcard_expansion') //
41 $qp->can('set_max_expansion'); # Xapian 1.5.0+
43 $cb = $qp->can('add_valuerangeprocessor') //
44 $qp->can('add_rangeprocessor'); # Xapian 1.5.0+
45 while (my ($name, $prefix) = each %PROB_PREFIX) {
46 $qp->add_prefix($name, $_) for split(/ /, $prefix);
48 $qp->add_boolean_prefix('type', 'T');
52 sub misc_enquire_once { # retry_reopen callback
53 my ($self, $qr, $opt) = @_;
54 my $eq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
56 my $desc = !$opt->{asc};
57 my $rel = $opt->{relevance} // 0;
58 if ($rel == -1) { # ORDER BY docid/UID
59 $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
60 $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
62 $eq->set_sort_by_relevance_then_value($MODIFIED, $desc);
64 $eq->set_sort_by_value_then_relevance($MODIFIED, $desc);
66 $eq->get_mset($opt->{offset} || 0, $opt->{limit} || 200);
70 my ($self, $qs, $opt) = @_;
72 my $qp = $self->{qp} //= mi_qp_new($self);
73 $qs = 'type:inbox' if $qs eq '';
74 my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
75 $opt->{relevance} = 1 unless exists $opt->{relevance};
76 retry_reopen($self, \&misc_enquire_once, $qr, $opt);
79 sub ibx_matches_once { # retry_reopen callback
80 my ($self, $qr, $by_newsgroup) = @_;
81 # double in case no newsgroups are configured:
82 my $limit = scalar(keys %$by_newsgroup) * 2;
83 my $opt = { limit => $limit, offset => 0, relevance => -1 };
84 my $ret = {}; # newsgroup => $ibx of matches
86 my $mset = misc_enquire_once($self, $qr, $opt);
87 for my $mi ($mset->items) {
88 my $doc = $mi->get_document;
89 my $end = $doc->termlist_end;
90 my $cur = $doc->termlist_begin;
93 my $ng = $cur->get_termname; # eidx_key
94 $ng =~ s/\AQ// or warn "BUG: no `Q': $ng";
95 if (my $ibx = $by_newsgroup->{$ng}) {
100 W: docid=${\$mi->get_docid} has no `Q' (eidx_key) term
104 my $nr = $mset->size;
105 return $ret if $nr < $limit;
106 $opt->{offset} += $nr;
110 # returns a newsgroup => PublicInbox::Inbox mapping
111 sub newsgroup_matches {
112 my ($self, $qs, $pi_cfg) = @_;
113 my $qp = $self->{qp} //= mi_qp_new($self);
114 $qs .= ' type:inbox';
115 my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
116 retry_reopen($self, \&ibx_matches_once, $qr, $pi_cfg->{-by_newsgroup});
120 my ($self, $ibx) = @_;
121 my $xdb = $self->{xdb};
122 my $eidx_key = $ibx->eidx_key; # may be {inboxdir}, so private
123 my $head = $xdb->postlist_begin('Q'.$eidx_key);
124 my $tail = $xdb->postlist_end('Q'.$eidx_key);
125 if ($head != $tail) {
126 my $doc = $xdb->get_document($head->get_docid);
134 my ($self, $ibx) = @_;
135 retry_reopen($self, \&ibx_data_once, $ibx);