use strict;
use parent qw(Exporter);
our @EXPORT_OK = qw(mdocid);
+use List::Util qw(max);
# values for searching, changing the numeric value breaks
# compatibility with old indices (so don't change them it)
# added for public-inbox 1.6.0+
BYTES => 3, # IMAP RFC822.SIZE
UID => 4, # IMAP UID == NNTP article number == Xapian docid
+ THREADID => 5, # RFC 8474, RFC 8621
# TODO
- # THREADID => ?
# REPLYCNT => ?, # IMAP ANSWERED
# SCHEMA_VERSION history
# public-inbox v1.5.0 adds (still SCHEMA_VERSION=15):
# * "lid:" and "l:" for List-Id searches
#
- # v1.6.0 adds BYTES and UID values
+ # v1.6.0 adds BYTES, UID and THREADID values
SCHEMA_VERSION => 15,
};
use PublicInbox::Smsg;
use PublicInbox::Over;
my $QP_FLAGS;
-our %X = map { $_ => 0 } qw(BoolWeight Database Enquire
- NumberValueRangeProcessor QueryParser Stem);
+our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem);
our $Xap; # 'Search::Xapian' or 'Xapian'
+my $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor')
my $ENQ_ASCENDING;
sub load_xapian () {
return 1 if defined $Xap;
- for my $x (qw(Search::Xapian Xapian)) {
+ # n.b. PI_XAPIAN is intended for development use only. We still
+ # favor Search::Xapian since that's what's available in current
+ # Debian stable (10.x) and derived distros.
+ for my $x (($ENV{PI_XAPIAN} // 'Search::Xapian'), 'Xapian') {
eval "require $x";
next if $@;
$x->import(qw(:standard));
$Xap = $x;
+
+ # `version_string' was added in Xapian 1.1
+ my $xver = eval('v'.eval($x.'::version_string()')) //
+ eval('v'.eval($x.'::xapian_version_string()'));
+
+ # NumberRangeProcessor was added in Xapian 1.3.6,
+ # NumberValueRangeProcessor was removed for 1.5.0+,
+ # favor the older /Value/ variant since that's what our
+ # (currently) preferred Search::Xapian supports
+ $NVRP = $x.'::'.($x eq 'Xapian' && $xver ge v1.5 ?
+ 'NumberRangeProcessor' : 'NumberValueRangeProcessor');
$X{$_} = $Xap.'::'.$_ for (keys %X);
# ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm,
# We need numeric sorting so shard[0] is first for reading
# Xapian metadata, if needed
- for (sort { $a <=> $b } grep(/\A[0-9]+\z/, readdir($dh))) {
+ my $last = max(grep(/\A[0-9]+\z/, readdir($dh)));
+ return if !defined($last);
+ for (0..$last) {
my $shard_dir = "$dir/$_";
if (-d $shard_dir && -r _) {
push @xdb, $X{Database}->new($shard_dir);
int(($docid - 1) / $nshard) + 1;
}
+sub mset_to_artnums {
+ my ($self, $mset) = @_;
+ my $nshard = $self->{nshard} // 1;
+ [ map { mdocid($nshard, $_) } $mset->items ];
+}
+
sub xdb ($) {
my ($self) = @_;
$self->{xdb} ||= do {
ibx_ver => $ibx->version,
}, $class;
xpfx_init($self);
- my $dir = xdir($self, 1);
- $self->{over_ro} = PublicInbox::Over->new("$dir/over.sqlite3");
$self;
}
}
# read-only
-sub query {
+sub mset {
my ($self, $query_string, $opts) = @_;
$opts ||= {};
- if ($query_string eq '' && !$opts->{mset}) {
- $self->{over_ro}->recent($opts);
- } else {
- my $qp = $self->{qp} //= qparse_new($self);
- my $qp_flags = $self->{qp_flags};
- my $query = $qp->parse_query($query_string, $qp_flags);
- $opts->{relevance} = 1 unless exists $opts->{relevance};
- _do_enquire($self, $query, $opts);
- }
+ my $qp = $self->{qp} //= qparse_new($self);
+ my $query = $qp->parse_query($query_string, $self->{qp_flags});
+ $opts->{relevance} = 1 unless exists $opts->{relevance};
+ _do_enquire($self, $query, $opts);
}
sub retry_reopen {
retry_reopen($self, \&_enquire_once, [ $self, $query, $opts ]);
}
+# returns true if all docs have the THREADID value
+sub has_threadid ($) {
+ my ($self) = @_;
+ (xdb($self)->get_metadata('has_threadid') // '') eq '1';
+}
+
sub _enquire_once { # retry_reopen callback
my ($self, $query, $opts) = @{$_[0]};
my $xdb = xdb($self);
} else {
$enquire->set_sort_by_value_then_relevance(TS, $desc);
}
- my $offset = $opts->{offset} || 0;
- my $limit = $opts->{limit} || 50;
- my $mset = $enquire->get_mset($offset, $limit);
- return $mset if $opts->{mset};
+
+ # `mairix -t / --threads' or JMAP collapseThreads
+ if ($opts->{thread} && has_threadid($self)) {
+ $enquire->set_collapse_key(THREADID);
+ }
+ $enquire->get_mset($opts->{offset} || 0, $opts->{limit} || 50);
+}
+
+sub mset_to_smsg {
+ my ($self, $ibx, $mset) = @_;
my $nshard = $self->{nshard} // 1;
my $i = 0;
my %order = map { mdocid($nshard, $_) => ++$i } $mset->items;
my @msgs = sort {
$order{$a->{num}} <=> $order{$b->{num}}
- } @{$self->{over_ro}->get_all(keys %order)};
+ } @{$ibx->over->get_all(keys %order)};
wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs;
}
$qp->set_database($xdb);
$qp->set_stemmer(stemmer($self));
$qp->set_stemming_strategy(STEM_SOME());
- $qp->set_max_wildcard_expansion(100);
- my $nvrp = $X{NumberValueRangeProcessor};
- $qp->add_valuerangeprocessor($nvrp->new(YYYYMMDD, 'd:'));
- $qp->add_valuerangeprocessor($nvrp->new(DT, 'dt:'));
+ my $cb = $qp->can('set_max_wildcard_expansion') //
+ $qp->can('set_max_expansion'); # Xapian 1.5.0+
+ $cb->($qp, 100);
+ $cb = $qp->can('add_valuerangeprocessor') //
+ $qp->can('add_rangeprocessor'); # Xapian 1.5.0+
+ $cb->($qp, $NVRP->new(YYYYMMDD, 'd:'));
+ $cb->($qp, $NVRP->new(DT, 'dt:'));
# for IMAP, undocumented for WWW and may be split off go away
- $qp->add_valuerangeprocessor($nvrp->new(BYTES, 'bytes:'));
- $qp->add_valuerangeprocessor($nvrp->new(TS, 'ts:'));
- $qp->add_valuerangeprocessor($nvrp->new(UID, 'uid:'));
+ $cb->($qp, $NVRP->new(BYTES, 'bytes:'));
+ $cb->($qp, $NVRP->new(TS, 'ts:'));
+ $cb->($qp, $NVRP->new(UID, 'uid:'));
while (my ($name, $prefix) = each %bool_pfx_external) {
$qp->add_boolean_prefix($name, $_) foreach split(/ /, $prefix);