-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# read-only counterpart to MiscIdx
package PublicInbox::MiscSearch;
use strict;
use v5.10.1;
-use PublicInbox::Search qw(retry_reopen);
+use PublicInbox::Search qw(retry_reopen int_val xap_terms);
+my $json;
# Xapian value columns:
our $MODIFIED = 0;
+our $UIDVALIDITY = 1; # (created time)
+our $ART_MIN = 2; # NNTP article number
+our $ART_MAX = 3; # NNTP article number
# avoid conflicting with message Search::prob_prefix for UI/UX reasons
my %PROB_PREFIX = (
sub new {
my ($class, $dir) = @_;
PublicInbox::Search::load_xapian();
+ $json //= PublicInbox::Config::json();
bless {
xdb => $PublicInbox::Search::X{Database}->new($dir)
}, $class;
$eq->set_query($qr);
my $desc = !$opt->{asc};
my $rel = $opt->{relevance} // 0;
- if ($rel == -1) { # ORDER BY docid/UID
+ if ($rel == -1) { # ORDER BY docid
$eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
$eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
} elsif ($rel) {
sub mset {
my ($self, $qs, $opt) = @_;
$opt ||= {};
+ reopen($self);
my $qp = $self->{qp} //= mi_qp_new($self);
$qs = 'type:inbox' if $qs eq '';
my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
retry_reopen($self, \&misc_enquire_once, $qr, $opt);
}
-sub ibx_matches_once { # retry_reopen callback
- my ($self, $qr, $by_newsgroup) = @_;
- # double in case no newsgroups are configured:
- my $limit = scalar(keys %$by_newsgroup) * 2;
- my $opt = { limit => $limit, offset => 0, relevance => -1 };
- my $ret = {}; # newsgroup => $ibx of matches
- while (1) {
- my $mset = misc_enquire_once($self, $qr, $opt);
- for my $mi ($mset->items) {
- my $doc = $mi->get_document;
- my $end = $doc->termlist_end;
- my $cur = $doc->termlist_begin;
- $cur->skip_to('Q');
- if ($cur != $end) {
- my $ng = $cur->get_termname; # eidx_key
- $ng =~ s/\AQ// or warn "BUG: no `Q': $ng";
- if (my $ibx = $by_newsgroup->{$ng}) {
- $ret->{$ng} = $ibx;
- }
- } else {
- warn <<EOF;
-W: docid=${\$mi->get_docid} has no `Q' (eidx_key) term
-EOF
- }
- }
- my $nr = $mset->size;
- return $ret if $nr < $limit;
- $opt->{offset} += $nr;
- }
-}
-
-# returns a newsgroup => PublicInbox::Inbox mapping
-sub newsgroup_matches {
- my ($self, $qs, $pi_cfg) = @_;
- my $qp = $self->{qp} //= mi_qp_new($self);
- $qs .= ' type:inbox';
- my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
- retry_reopen($self, \&ibx_matches_once, $qr, $pi_cfg->{-by_newsgroup});
-}
-
sub ibx_data_once {
my ($self, $ibx) = @_;
my $xdb = $self->{xdb};
- my $eidx_key = $ibx->eidx_key; # may be {inboxdir}, so private
- my $head = $xdb->postlist_begin('Q'.$eidx_key);
- my $tail = $xdb->postlist_end('Q'.$eidx_key);
- if ($head != $tail) {
- my $doc = $xdb->get_document($head->get_docid);
- $doc->get_data;
- } else {
- undef;
+ my $term = 'Q'.$ibx->eidx_key; # may be {inboxdir}, so private
+ my $head = $xdb->postlist_begin($term);
+ my $tail = $xdb->postlist_end($term);
+ return if $head == $tail;
+ my $doc = $xdb->get_document($head->get_docid);
+ $ibx->{uidvalidity} //= int_val($doc, $UIDVALIDITY);
+ $ibx->{-modified} = int_val($doc, $MODIFIED);
+ $ibx->{-art_min} = int_val($doc, $ART_MIN);
+ $ibx->{-art_max} = int_val($doc, $ART_MAX);
+ $doc->get_data;
+}
+
+sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc)
+ my ($doc) = $_[-1];
+ my $d;
+ my $data = $json->decode($doc->get_data);
+ for (values %$data) {
+ $d = $_->{description} // next;
+ $d =~ s/ \[epoch [0-9]+\]\z// or next;
+ last;
}
+ {
+ uidvalidity => int_val($doc, $UIDVALIDITY),
+ -modified => int_val($doc, $MODIFIED),
+ -art_min => int_val($doc, $ART_MIN), # may be undef
+ -art_max => int_val($doc, $ART_MAX), # may be undef
+ # extract description from manifest.js.gz epoch description
+ description => $d
+ };
}
sub inbox_data {
retry_reopen($self, \&ibx_data_once, $ibx);
}
+sub ibx_cache_load {
+ my ($doc, $cache) = @_;
+ my ($eidx_key) = xap_terms('Q', $doc);
+ return unless defined($eidx_key); # expired
+ $cache->{$eidx_key} = doc2ibx_cache_ent($doc);
+}
+
+sub _nntpd_cache_load { # retry_reopen callback
+ my ($self) = @_;
+ my $opt = { limit => $self->{xdb}->get_doccount * 10, relevance => -1 };
+ my $mset = mset($self, 'type:newsgroup type:inbox', $opt);
+ my $cache = {};
+ for my $it ($mset->items) {
+ ibx_cache_load($it->get_document, $cache);
+ }
+ $cache
+}
+
+# returns { newsgroup => $cache_entry } mapping, $cache_entry contains
+# anything which may trigger seeks at startup, currently: description,
+# -modified, and uidvalidity.
+sub nntpd_cache_load {
+ my ($self) = @_;
+ retry_reopen($self, \&_nntpd_cache_load);
+}
+
+no warnings 'once';
+*reopen = \&PublicInbox::Search::reopen;
+
1;