package PublicInbox::Search;
use strict;
use parent qw(Exporter);
-our @EXPORT_OK = qw(mdocid);
+our @EXPORT_OK = qw(retry_reopen);
+use List::Util qw(max);
# values for searching, changing the numeric value breaks
# compatibility with old indices (so don't change them it)
use PublicInbox::Smsg;
use PublicInbox::Over;
-my $QP_FLAGS;
-our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem);
+our $QP_FLAGS;
+our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query);
our $Xap; # 'Search::Xapian' or 'Xapian'
-my $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor')
-my $ENQ_ASCENDING;
+our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor')
+our $ENQ_ASCENDING;
sub load_xapian () {
return 1 if defined $Xap;
$ENQ_ASCENDING = $x eq 'Xapian' ?
1 : Search::Xapian::ENQ_ASCENDING();
- # for Smsg:
- *PublicInbox::Smsg::sortable_unserialise =
- $Xap.'::sortable_unserialise';
+ *sortable_serialise = $x.'::sortable_serialise';
# n.b. FLAG_PURE_NOT is expensive not suitable for a public
# website as it could become a denial-of-service vector
# FLAG_PHRASE also seems to cause performance problems chert
}
}
-sub _xdb ($) {
+sub xdb_sharded {
+ my ($self) = @_;
+ opendir(my $dh, $self->{xpfx}) or return; # not initialized yet
+
+ # We need numeric sorting so shard[0] is first for reading
+ # Xapian metadata, if needed
+ my $last = max(grep(/\A[0-9]+\z/, readdir($dh)));
+ return if !defined($last);
+ my (@xdb, $slow_phrase);
+ for (0..$last) {
+ my $shard_dir = "$self->{xpfx}/$_";
+ if (-d $shard_dir && -r _) {
+ push @xdb, $X{Database}->new($shard_dir);
+ $slow_phrase ||= -f "$shard_dir/iamchert";
+ } else { # gaps from missing epochs throw off mdocid()
+ warn "E: $shard_dir missing or unreadable\n";
+ return;
+ }
+ }
+ $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase;
+ $self->{nshard} = scalar(@xdb);
+ my $xdb = shift @xdb;
+ $xdb->add_database($_) for @xdb;
+ $xdb;
+}
+
+sub _xdb {
my ($self) = @_;
my $dir = xdir($self, 1);
- my ($xdb, $slow_phrase);
- my $qpf = \($self->{qp_flags} ||= $QP_FLAGS);
+ $self->{qp_flags} //= $QP_FLAGS;
if ($self->{ibx_ver} >= 2) {
- my @xdb;
- opendir(my $dh, $dir) or return; # not initialized yet
-
- # We need numeric sorting so shard[0] is first for reading
- # Xapian metadata, if needed
- for (sort { $a <=> $b } grep(/\A[0-9]+\z/, readdir($dh))) {
- my $shard_dir = "$dir/$_";
- if (-d $shard_dir && -r _) {
- push @xdb, $X{Database}->new($shard_dir);
- $slow_phrase ||= -f "$shard_dir/iamchert";
- } else { # gaps from missing epochs throw off mdocid()
- warn "E: $shard_dir missing or unreadable\n";
- return;
- }
- }
- $self->{nshard} = scalar(@xdb);
- $xdb = shift @xdb;
- $xdb->add_database($_) for @xdb;
+ xdb_sharded($self);
} else {
- $slow_phrase = -f "$dir/iamchert";
- $xdb = $X{Database}->new($dir);
+ $self->{qp_flags} |= FLAG_PHRASE() if !-f "$dir/iamchert";
+ $X{Database}->new($dir);
}
- $$qpf |= FLAG_PHRASE() unless $slow_phrase;
- $xdb;
}
# v2 Xapian docids don't conflict, so they're identical to
sub xdb ($) {
my ($self) = @_;
- $self->{xdb} ||= do {
+ $self->{xdb} //= do {
load_xapian();
- _xdb($self);
+ $self->_xdb;
};
}
ibx_ver => $ibx->version,
}, $class;
xpfx_init($self);
- my $dir = xdir($self, 1);
- $self->{over_ro} = PublicInbox::Over->new("$dir/over.sqlite3");
$self;
}
}
# read-only
-sub query {
+sub mset {
my ($self, $query_string, $opts) = @_;
$opts ||= {};
- if ($query_string eq '' && !$opts->{mset}) {
- $self->{over_ro}->recent($opts);
- } else {
- my $qp = $self->{qp} //= qparse_new($self);
- my $qp_flags = $self->{qp_flags};
- my $query = $qp->parse_query($query_string, $qp_flags);
- $opts->{relevance} = 1 unless exists $opts->{relevance};
- _do_enquire($self, $query, $opts);
- }
+ my $qp = $self->{qp} //= qparse_new($self);
+ my $query = $qp->parse_query($query_string, $self->{qp_flags});
+ $opts->{relevance} = 1 unless exists $opts->{relevance};
+ _do_enquire($self, $query, $opts);
}
sub retry_reopen {
- my ($self, $cb, $arg) = @_;
+ my ($self, $cb, @arg) = @_;
for my $i (1..10) {
if (wantarray) {
my @ret;
- eval { @ret = $cb->($arg) };
+ eval { @ret = $cb->($self, @arg) };
return @ret unless $@;
} else {
my $ret;
- eval { $ret = $cb->($arg) };
+ eval { $ret = $cb->($self, @arg) };
return $ret unless $@;
}
# Exception: The revision being read has been discarded -
sub _do_enquire {
my ($self, $query, $opts) = @_;
- retry_reopen($self, \&_enquire_once, [ $self, $query, $opts ]);
+ retry_reopen($self, \&_enquire_once, $query, $opts);
}
# returns true if all docs have the THREADID value
}
sub _enquire_once { # retry_reopen callback
- my ($self, $query, $opts) = @{$_[0]};
+ my ($self, $query, $opts) = @_;
my $xdb = xdb($self);
+ if (defined(my $eidx_key = $opts->{eidx_key})) {
+ $query = $X{Query}->new(OP_FILTER(), $query, 'O'.$eidx_key);
+ }
+ if (defined(my $uid_range = $opts->{uid_range})) {
+ my $range = $X{Query}->new(OP_VALUE_RANGE(), UID,
+ sortable_serialise($uid_range->[0]),
+ sortable_serialise($uid_range->[1]));
+ $query = $X{Query}->new(OP_FILTER(), $query, $range);
+ }
my $enquire = $X{Enquire}->new($xdb);
$enquire->set_query($query);
$opts ||= {};
if ($opts->{thread} && has_threadid($self)) {
$enquire->set_collapse_key(THREADID);
}
+ $enquire->get_mset($opts->{offset} || 0, $opts->{limit} || 50);
+}
- my $offset = $opts->{offset} || 0;
- my $limit = $opts->{limit} || 50;
- my $mset = $enquire->get_mset($offset, $limit);
- return $mset if $opts->{mset};
+sub mset_to_smsg {
+ my ($self, $ibx, $mset) = @_;
my $nshard = $self->{nshard} // 1;
my $i = 0;
my %order = map { mdocid($nshard, $_) => ++$i } $mset->items;
my @msgs = sort {
$order{$a->{num}} <=> $order{$b->{num}}
- } @{$self->{over_ro}->get_all(keys %order)};
+ } @{$ibx->over->get_all(keys %order)};
wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs;
}