1 # Copyright (C) 2020 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # read-only counterpart to MiscIdx
5 package PublicInbox::MiscSearch;
8 use PublicInbox::Search qw(retry_reopen int_val);
11 # Xapian value columns:
13 our $UIDVALIDITY = 1; # (created time)
15 # avoid conflicting with message Search::prob_prefix for UI/UX reasons
17 description => 'S', # $INBOX_DIR/description
21 infourl => 'XINFOURL',
23 '' => 'S A XLISTID XNAME XURL XINFOURL'
27 my ($class, $dir) = @_;
28 PublicInbox::Search::load_xapian();
29 $json //= PublicInbox::Config::json();
31 xdb => $PublicInbox::Search::X{Database}->new($dir)
38 my $xdb = $self->{xdb};
39 my $qp = $PublicInbox::Search::X{QueryParser}->new;
40 $qp->set_default_op(PublicInbox::Search::OP_AND());
41 $qp->set_database($xdb);
42 $qp->set_stemmer(PublicInbox::Search::stemmer($self));
43 $qp->set_stemming_strategy(PublicInbox::Search::STEM_SOME());
44 my $cb = $qp->can('set_max_wildcard_expansion') //
45 $qp->can('set_max_expansion'); # Xapian 1.5.0+
47 $cb = $qp->can('add_valuerangeprocessor') //
48 $qp->can('add_rangeprocessor'); # Xapian 1.5.0+
49 while (my ($name, $prefix) = each %PROB_PREFIX) {
50 $qp->add_prefix($name, $_) for split(/ /, $prefix);
52 $qp->add_boolean_prefix('type', 'T');
56 sub misc_enquire_once { # retry_reopen callback
57 my ($self, $qr, $opt) = @_;
58 my $eq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
60 my $desc = !$opt->{asc};
61 my $rel = $opt->{relevance} // 0;
62 if ($rel == -1) { # ORDER BY docid/UID
63 $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
64 $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
66 $eq->set_sort_by_relevance_then_value($MODIFIED, $desc);
68 $eq->set_sort_by_value_then_relevance($MODIFIED, $desc);
70 $eq->get_mset($opt->{offset} || 0, $opt->{limit} || 200);
74 my ($self, $qs, $opt) = @_;
77 my $qp = $self->{qp} //= mi_qp_new($self);
78 $qs = 'type:inbox' if $qs eq '';
79 my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
80 $opt->{relevance} = 1 unless exists $opt->{relevance};
81 retry_reopen($self, \&misc_enquire_once, $qr, $opt);
84 sub ibx_matches_once { # retry_reopen callback
85 my ($self, $qr, $by_newsgroup) = @_;
86 # double in case no newsgroups are configured:
87 my $limit = scalar(keys %$by_newsgroup) * 2;
88 my $opt = { limit => $limit, offset => 0, relevance => -1 };
89 my $ret = {}; # newsgroup => $ibx of matches
91 my $mset = misc_enquire_once($self, $qr, $opt);
92 for my $mi ($mset->items) {
93 my $doc = $mi->get_document;
94 my $end = $doc->termlist_end;
95 my $cur = $doc->termlist_begin;
98 my $ng = $cur->get_termname; # eidx_key
99 $ng =~ s/\AQ// or warn "BUG: no `Q': $ng";
100 if (my $ibx = $by_newsgroup->{$ng}) {
105 W: docid=${\$mi->get_docid} has no `Q' (eidx_key) term
109 my $nr = $mset->size;
110 return $ret if $nr < $limit;
111 $opt->{offset} += $nr;
115 # returns a newsgroup => PublicInbox::Inbox mapping
116 sub newsgroup_matches {
117 my ($self, $qs, $pi_cfg) = @_;
118 my $qp = $self->{qp} //= mi_qp_new($self);
119 $qs .= ' type:inbox';
120 my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
121 retry_reopen($self, \&ibx_matches_once, $qr, $pi_cfg->{-by_newsgroup});
125 my ($self, $ibx) = @_;
126 my $xdb = $self->{xdb};
127 my $term = 'Q'.$ibx->eidx_key; # may be {inboxdir}, so private
128 my $head = $xdb->postlist_begin($term);
129 my $tail = $xdb->postlist_end($term);
130 if ($head != $tail) {
131 my $doc = $xdb->get_document($head->get_docid);
132 $ibx->{uidvalidity} //= int_val($doc, $UIDVALIDITY);
133 $ibx->{-modified} = int_val($doc, $MODIFIED);
141 my ($self, $ibx) = @_;
142 retry_reopen($self, \&ibx_data_once, $ibx);
146 my ($doc, $cache) = @_;
147 my $end = $doc->termlist_end;
148 my $cur = $doc->termlist_begin;
150 return if $cur == $end;
151 my $eidx_key = $cur->get_termname;
152 $eidx_key =~ s/\AQ// or return; # expired
153 my $ce = $cache->{$eidx_key} = {};
154 $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
155 $ce->{-modified} = int_val($doc, $MODIFIED);
156 $ce->{description} = do {
157 # extract description from manifest.js.gz epoch description
159 my $data = $json->decode($doc->get_data);
160 for (values %$data) {
161 $d = $_->{description} // next;
162 $d =~ s/ \[epoch [0-9]+\]\z// or next;
169 sub _nntpd_cache_load { # retry_reopen callback
171 my $opt = { limit => $self->{xdb}->get_doccount * 10, relevance => -1 };
172 my $mset = mset($self, 'type:newsgroup type:inbox', $opt);
174 for my $it ($mset->items) {
175 ibx_cache_load($it->get_document, $cache);
180 # returns { newsgroup => $cache_entry } mapping, $cache_entry contains
181 # anything which may trigger seeks at startup, currently: description,
182 # -modified, and uidvalidity.
183 sub nntpd_cache_load {
185 retry_reopen($self, \&_nntpd_cache_load);
189 *reopen = \&PublicInbox::Search::reopen;