sub attach_inbox {
my ($self, $ibx) = @_;
- my $key = $ibx->eidx_key;
- if (!$ibx->over || !$ibx->mm) {
- warn "W: skipping $key (unindexed)\n";
- return;
- }
- if (!defined($ibx->uidvalidity)) {
- warn "W: skipping $key (no UIDVALIDITY)\n";
- return;
+ my $ekey = $ibx->eidx_key;
+ my $misc = $self->{misc};
+ if ($misc && $misc->inbox_data($ibx)) { # all good if already indexed
+ } else {
+ if (!$ibx->over || !$ibx->mm) {
+ warn "W: skipping $ekey (unindexed)\n";
+ return;
+ }
+ if (!defined($ibx->uidvalidity)) {
+ warn "W: skipping $ekey (no UIDVALIDITY)\n";
+ return;
+ }
}
- $self->{ibx_map}->{$key} //= do {
+ $self->{ibx_map}->{$ekey} //= do {
push @{$self->{ibx_list}}, $ibx;
$ibx;
}
use File::Path ();
use PublicInbox::MiscSearch;
use PublicInbox::Config;
+my $json;
sub new {
my ($class, $eidx) = @_;
nodatacow_dir($mi_dir);
my $flags = $PublicInbox::SearchIdx::DB_CREATE_OR_OPEN;
$flags |= $PublicInbox::SearchIdx::DB_NO_SYNC if $eidx->{-no_fsync};
+ $json //= PublicInbox::Config::json();
bless {
mi_dir => $mi_dir,
flags => $flags,
$xdb->delete_document($_) for @drop; # just in case
my $doc = $PublicInbox::Search::X{Document}->new;
+ term_generator($self)->set_document($doc);
- # allow sorting by modified
+ # allow sorting by modified and uidvalidity (created at)
add_val($doc, $PublicInbox::MiscSearch::MODIFIED, $ibx->modified);
+ add_val($doc, $PublicInbox::MiscSearch::UIDVALIDITY, $ibx->uidvalidity);
- $doc->add_boolean_term('Q'.$eidx_key);
- $doc->add_boolean_term('T'.'inbox');
- term_generator($self)->set_document($doc);
+ $doc->add_boolean_term('Q'.$eidx_key); # uniQue id
+ $doc->add_boolean_term('T'.'inbox'); # Type
+
+ if (defined($ibx->{newsgroup}) && $ibx->nntp_usable) {
+ $doc->add_boolean_term('T'.'newsgroup'); # additional Type
+ }
+
+ # force reread from disk, {description} could be loaded from {misc}
+ delete $ibx->{description};
+ my $desc = $ibx->description;
# description = S/Subject (or title)
# address = A/Author
- index_text($self, $ibx->description, 1, 'S');
+ index_text($self, $desc, 1, 'S');
+ index_text($self, $ibx->{name}, 1, 'XNAME');
my %map = (
address => 'A',
listid => 'XLISTID',
index_text($self, $v, 1, $pfx);
}
}
- index_text($self, $ibx->{name}, 1, 'XNAME');
my $data = {};
if (defined(my $max = $ibx->max_git_epoch)) { # v2
- my $desc = $ibx->description;
my $pfx = "/$ibx->{name}/git/";
for my $epoch (0..$max) {
my $git = $ibx->git_epoch($epoch) or return;
$ent->{git_dir} = $ibx->{inboxdir};
$data->{"/$ibx->{name}"} = $ent;
}
- $doc->set_data(PublicInbox::Config::json()->encode($data));
+ $doc->set_data($json->encode($data));
if (defined $docid) {
$xdb->replace_document($docid, $doc);
} else {
package PublicInbox::MiscSearch;
use strict;
use v5.10.1;
-use PublicInbox::Search qw(retry_reopen);
+use PublicInbox::Search qw(retry_reopen int_val);
+my $json;
# Xapian value columns:
our $MODIFIED = 0;
+our $UIDVALIDITY = 1; # (created time)
# avoid conflicting with message Search::prob_prefix for UI/UX reasons
my %PROB_PREFIX = (
sub new {
my ($class, $dir) = @_;
PublicInbox::Search::load_xapian();
+ $json //= PublicInbox::Config::json();
bless {
xdb => $PublicInbox::Search::X{Database}->new($dir)
}, $class;
sub ibx_data_once {
my ($self, $ibx) = @_;
my $xdb = $self->{xdb};
- my $eidx_key = $ibx->eidx_key; # may be {inboxdir}, so private
- my $head = $xdb->postlist_begin('Q'.$eidx_key);
- my $tail = $xdb->postlist_end('Q'.$eidx_key);
+ my $term = 'Q'.$ibx->eidx_key; # may be {inboxdir}, so private
+ my $head = $xdb->postlist_begin($term);
+ my $tail = $xdb->postlist_end($term);
if ($head != $tail) {
my $doc = $xdb->get_document($head->get_docid);
+ $ibx->{uidvalidity} //= int_val($doc, $UIDVALIDITY);
+ $ibx->{-modified} = int_val($doc, $MODIFIED);
$doc->get_data;
} else {
undef;
retry_reopen($self, \&ibx_data_once, $ibx);
}
+sub ibx_cache_load {
+ my ($doc, $cache) = @_;
+ my $end = $doc->termlist_end;
+ my $cur = $doc->termlist_begin;
+ $cur->skip_to('Q');
+ return if $cur == $end;
+ my $eidx_key = $cur->get_termname;
+ $eidx_key =~ s/\AQ// or return; # expired
+ my $ce = $cache->{$eidx_key} = {};
+ $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
+ $ce->{-modified} = int_val($doc, $MODIFIED);
+ $ce->{description} = do {
+ # extract description from manifest.js.gz epoch description
+ my $d;
+ my $data = $json->decode($doc->get_data);
+ for (values %$data) {
+ $d = $_->{description} // next;
+ $d =~ s/ \[epoch [0-9]+\]\z// or next;
+ last;
+ }
+ $d;
+ }
+}
+
+sub _nntpd_cache_load { # retry_reopen callback
+ my ($self) = @_;
+ my $opt = { limit => $self->{xdb}->get_doccount * 10, relevance => -1 };
+ my $mset = mset($self, 'type:newsgroup type:inbox', $opt);
+ my $cache = {};
+ for my $it ($mset->items) {
+ ibx_cache_load($it->get_document, $cache);
+ }
+ $cache
+}
+
+# returns { newsgroup => $cache_entry } mapping, $cache_entry contains
+# anything which may trigger seeks at startup, currently: description,
+# -modified, and uidvalidity.
+sub nntpd_cache_load {
+ my ($self) = @_;
+ retry_reopen($self, \&_nntpd_cache_load);
+}
+
1;
my ($self, $sig) = @_;
my $pi_cfg = $sig ? PublicInbox::Config->new : $self->{pi_cfg};
my $groups = $pi_cfg->{-by_newsgroup}; # filled during each_inbox
+ my $cache = eval { $pi_cfg->ALL->misc->nntpd_cache_load } // {};
$pi_cfg->each_inbox(sub {
my ($ibx) = @_;
my $ngname = $ibx->{newsgroup} // return;
- if ($ibx->nntp_usable) {
+ my $ce = $cache->{$ngname};
+ if (($ce and (%$ibx = (%$ibx, %$ce))) || $ibx->nntp_usable) {
# only valid if msgmap and over works
# preload to avoid fragmentation:
$ibx->description;
package PublicInbox::Search;
use strict;
use parent qw(Exporter);
-our @EXPORT_OK = qw(retry_reopen);
+our @EXPORT_OK = qw(retry_reopen int_val);
use List::Util qw(max);
# values for searching, changing the numeric value breaks
1 : Search::Xapian::ENQ_ASCENDING();
*sortable_serialise = $x.'::sortable_serialise';
+ *sortable_unserialise = $x.'::sortable_unserialise';
# n.b. FLAG_PURE_NOT is expensive not suitable for a public
# website as it could become a denial-of-service vector
# FLAG_PHRASE also seems to cause performance problems chert
\@ret;
}
+sub int_val ($$) {
+ my ($doc, $col) = @_;
+ my $val = $doc->get_value($col) or return; # undefined is '' in Xapian
+ sortable_unserialise($val) + 0; # PV => IV conversion
+}
+
1;
}
eval 'require '.$X->{WritableDatabase} or die;
*sortable_serialise = $xap.'::sortable_serialise';
- *sortable_unserialise = $xap.'::sortable_unserialise';
$DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
$DB_OPEN = eval($xap.'::DB_OPEN()');
my $ver = (eval($xap.'::major_version()') << 16) |
$self->{xdb}->replace_document($docid, $doc);
}
-sub int_val ($$) {
- my ($doc, $col) = @_;
- my $val = $doc->get_value($col) or return; # undefined is '' in Xapian
- sortable_unserialise($val) + 0; # PV => IV conversion
-}
-
sub smsg_from_doc ($) {
my ($doc) = @_;
my $data = $doc->get_data or return;
like($smsg->{to}, qr/\blist\@example\.com\b/, 'to appears');
my $doc = $m->get_document;
my $col = PublicInbox::Search::BYTES();
- my $bytes = PublicInbox::SearchIdx::int_val($doc, $col);
+ my $bytes = PublicInbox::Search::int_val($doc, $col);
like($bytes, qr/\A[0-9]+\z/, '$bytes stored as digit');
ok($bytes > 0, '$bytes is > 0');
is($bytes, $smsg->{bytes}, 'bytes Xapian value matches Over');
$col = PublicInbox::Search::UID();
- my $uid = PublicInbox::SearchIdx::int_val($doc, $col);
+ my $uid = PublicInbox::Search::int_val($doc, $col);
is($uid, $smsg->{num}, 'UID column matches {num}');
is($uid, $m->get_docid, 'UID column matches docid');
}