1 # Copyright (C) 2020 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # read-only counterpart to MiscIdx
5 package PublicInbox::MiscSearch;
8 use PublicInbox::Search qw(retry_reopen);
10 # Xapian value columns:
13 # avoid conflicting with message Search::prob_prefix for UI/UX reasons
15 description => 'S', # $INBOX_DIR/description
19 infourl => 'XINFOURL',
21 '' => 'S A XLISTID XNAME XURL XINFOURL'
25 my ($class, $dir) = @_;
26 PublicInbox::Search::load_xapian();
28 xdb => $PublicInbox::Search::X{Database}->new($dir)
35 my $xdb = $self->{xdb};
36 my $qp = $PublicInbox::Search::X{QueryParser}->new;
37 $qp->set_default_op(PublicInbox::Search::OP_AND());
38 $qp->set_database($xdb);
39 $qp->set_stemmer(PublicInbox::Search::stemmer($self));
40 $qp->set_stemming_strategy(PublicInbox::Search::STEM_SOME());
41 my $cb = $qp->can('set_max_wildcard_expansion') //
42 $qp->can('set_max_expansion'); # Xapian 1.5.0+
44 $cb = $qp->can('add_valuerangeprocessor') //
45 $qp->can('add_rangeprocessor'); # Xapian 1.5.0+
46 while (my ($name, $prefix) = each %PROB_PREFIX) {
47 $qp->add_prefix($name, $_) for split(/ /, $prefix);
49 $qp->add_boolean_prefix('type', 'T');
53 sub misc_enquire_once { # retry_reopen callback
54 my ($self, $qr, $opt) = @_;
55 my $eq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
57 my $desc = !$opt->{asc};
58 my $rel = $opt->{relevance} // 0;
59 if ($rel == -1) { # ORDER BY docid/UID
60 $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
61 $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
63 $eq->set_sort_by_relevance_then_value($MODIFIED, $desc);
65 $eq->set_sort_by_value_then_relevance($MODIFIED, $desc);
67 $eq->get_mset($opt->{offset} || 0, $opt->{limit} || 200);
71 my ($self, $qs, $opt) = @_;
73 my $qp = $self->{qp} //= mi_qp_new($self);
74 $qs = 'type:inbox' if $qs eq '';
75 my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
76 $opt->{relevance} = 1 unless exists $opt->{relevance};
77 retry_reopen($self, \&misc_enquire_once, $qr, $opt);
80 sub ibx_matches_once { # retry_reopen callback
81 my ($self, $qr, $by_newsgroup) = @_;
82 # double in case no newsgroups are configured:
83 my $limit = scalar(keys %$by_newsgroup) * 2;
84 my $opt = { limit => $limit, offset => 0, relevance => -1 };
85 my $ret = {}; # newsgroup => $ibx of matches
87 my $mset = misc_enquire_once($self, $qr, $opt);
88 for my $mi ($mset->items) {
89 my $doc = $mi->get_document;
90 my $end = $doc->termlist_end;
91 my $cur = $doc->termlist_begin;
94 my $ng = $cur->get_termname; # eidx_key
95 $ng =~ s/\AQ// or warn "BUG: no `Q': $ng";
96 if (my $ibx = $by_newsgroup->{$ng}) {
101 W: docid=${\$mi->get_docid} has no `Q' (eidx_key) term
105 my $nr = $mset->size;
106 return $ret if $nr < $limit;
107 $opt->{offset} += $nr;
111 # returns a newsgroup => PublicInbox::Inbox mapping
112 sub newsgroup_matches {
113 my ($self, $qs, $pi_cfg) = @_;
114 my $qp = $self->{qp} //= mi_qp_new($self);
115 $qs .= ' type:inbox';
116 my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
117 retry_reopen($self, \&ibx_matches_once, $qr, $pi_cfg->{-by_newsgroup});
121 my ($self, $ibx) = @_;
122 my $xdb = $self->{xdb};
123 my $eidx_key = $ibx->eidx_key; # may be {inboxdir}, so private
124 my $head = $xdb->postlist_begin('Q'.$eidx_key);
125 my $tail = $xdb->postlist_end('Q'.$eidx_key);
126 if ($head != $tail) {
127 my $doc = $xdb->get_document($head->get_docid);
135 my ($self, $ibx) = @_;
136 retry_reopen($self, \&ibx_data_once, $ibx);