while (my $mod = shift @mods) {
if (my $groups = $mod_groups{$mod}) {
push @mods, @$groups;
+ } elsif ($mod eq 'Search::Xapian') {
+ require PublicInbox::Search;
+ PublicInbox::Search::load_xapian() or
+ $err->{'Search::Xapian || Xapian'} = $@;
} else {
eval "require $mod";
$err->{$mod} = $@ if $@;
use PublicInbox::MID qw/id_compress/;
use PublicInbox::Over;
my $QP_FLAGS;
+our %X = map { $_ => 0 } qw(BoolWeight Database Enquire
+ NumberValueRangeProcessor QueryParser Stem);
+our $Xap; # 'Search::Xapian' or 'Xapian'
+my $ENQ_ASCENDING;
+
sub load_xapian () {
- $QP_FLAGS ||= eval {
- require Search::Xapian;
- Search::Xapian->import(qw(:standard));
+ return 1 if defined $Xap;
+ for my $x (qw(Search::Xapian Xapian)) {
+ eval "require $x";
+ next if $@;
+
+ $x->import(qw(:standard));
+ $Xap = $x;
+ $X{$_} = $Xap.'::'.$_ for (keys %X);
+ # ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm,
+ # so lets hope this part of the ABI is stable because it's
+ # just an integer:
+ $ENQ_ASCENDING = $x eq 'Xapian' ?
+ 1 : Search::Xapian::ENQ_ASCENDING();
+
+ # for SearchMsg:
+ *PublicInbox::SearchMsg::sortable_unserialise =
+ $Xap.'::sortable_unserialise';
# n.b. FLAG_PURE_NOT is expensive not suitable for a public
# website as it could become a denial-of-service vector
# FLAG_PHRASE also seems to cause performance problems chert
# (and probably earlier Xapian DBs). glass seems fine...
# TODO: make this an option, maybe?
# or make indexlevel=medium as default
- FLAG_PHRASE()|FLAG_BOOLEAN()|FLAG_LOVEHATE()|FLAG_WILDCARD();
- };
-};
+ $QP_FLAGS = FLAG_PHRASE() | FLAG_BOOLEAN() | FLAG_LOVEHATE() |
+ FLAG_WILDCARD();
+ last;
+ }
+ undef;
+}
# This is English-only, everything else is non-standard and may be confused as
# a prefix common in patch emails
if ($self->{version} >= 2) {
foreach my $shard (<$dir/*>) {
-d $shard && $shard =~ m!/[0-9]+\z! or next;
- my $sub = Search::Xapian::Database->new($shard);
+ my $sub = $X{Database}->new($shard);
if ($xdb) {
$xdb->add_database($sub);
} else {
}
} else {
$slow_phrase = -f "$dir/iamchert";
- $xdb = Search::Xapian::Database->new($dir);
+ $xdb = $X{Database}->new($dir);
}
$$qpf |= FLAG_PHRASE() unless $slow_phrase;
$xdb;
}
# Exception: The revision being read has been discarded -
# you should call Xapian::Database::reopen()
- if (ref($@) eq 'Search::Xapian::DatabaseModifiedError') {
+ if (ref($@) =~ /\bDatabaseModifiedError\b/) {
warn "reopen try #$i on $@\n";
reopen($self);
} else {
sub _enquire_once {
my ($self, $query, $opts) = @_;
my $xdb = xdb($self);
- my $enquire = Search::Xapian::Enquire->new($xdb);
+ my $enquire = $X{Enquire}->new($xdb);
$enquire->set_query($query);
$opts ||= {};
my $desc = !$opts->{asc};
if (($opts->{mset} || 0) == 2) {
- $enquire->set_docid_order(Search::Xapian::ENQ_ASCENDING());
- $enquire->set_weighting_scheme(Search::Xapian::BoolWeight->new);
+ $enquire->set_docid_order($ENQ_ASCENDING);
+ $enquire->set_weighting_scheme($X{BoolWeight}->new);
} elsif ($opts->{relevance}) {
$enquire->set_sort_by_relevance_then_value(TS, $desc);
} else {
}
# read-write
-sub stemmer { Search::Xapian::Stem->new($LANG) }
+sub stemmer { $X{Stem}->new($LANG) }
# read-only
sub qp {
return $qp if $qp;
my $xdb = xdb($self);
# new parser
- $qp = Search::Xapian::QueryParser->new;
+ $qp = $X{QueryParser}->new;
$qp->set_default_op(OP_AND());
$qp->set_database($xdb);
$qp->set_stemmer($self->stemmer);
$qp->set_stemming_strategy(STEM_SOME());
$qp->set_max_wildcard_expansion(100);
- $qp->add_valuerangeprocessor(
- Search::Xapian::NumberValueRangeProcessor->new(YYYYMMDD, 'd:'));
- $qp->add_valuerangeprocessor(
- Search::Xapian::NumberValueRangeProcessor->new(DT, 'dt:'));
+ my $nvrp = $X{NumberValueRangeProcessor};
+ $qp->add_valuerangeprocessor($nvrp->new(YYYYMMDD, 'd:'));
+ $qp->add_valuerangeprocessor($nvrp->new(DT, 'dt:'));
while (my ($name, $prefix) = each %bool_pfx_external) {
$qp->add_boolean_prefix($name, $_) foreach split(/ /, $prefix);
use PublicInbox::OverIdx;
use PublicInbox::Spawn qw(spawn);
use PublicInbox::Git qw(git_unquote);
-
+my $X = \%PublicInbox::Search::X;
+my ($DB_CREATE_OR_OPEN, $DB_OPEN);
use constant {
BATCH_BYTES => defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ?
0x7fffffff : 1_000_000,
undef;
}
+sub load_xapian_writable () {
+ return 1 if $X->{WritableDatabase};
+ PublicInbox::Search::load_xapian() or return;
+ my $xap = $PublicInbox::Search::Xap;
+ for (qw(Document TermGenerator WritableDatabase)) {
+ $X->{$_} = $xap.'::'.$_;
+ }
+ eval 'require '.$X->{WritableDatabase} or die;
+ *sortable_serialise = $xap.'::sortable_serialise';
+ $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
+ $DB_OPEN = eval($xap.'::DB_OPEN()');
+ 1;
+}
+
sub _xdb_acquire {
my ($self) = @_;
my $flag;
my $dir = $self->xdir;
if (need_xapian($self)) {
croak 'already acquired' if $self->{xdb};
- PublicInbox::Search::load_xapian();
- require Search::Xapian::WritableDatabase;
- $flag = $self->{creat} ?
- Search::Xapian::DB_CREATE_OR_OPEN() :
- Search::Xapian::DB_OPEN();
+ load_xapian_writable();
+ $flag = $self->{creat} ? $DB_CREATE_OR_OPEN : $DB_OPEN;
}
if ($self->{creat}) {
require File::Path;
}
}
return unless defined $flag;
- my $xdb = eval { Search::Xapian::WritableDatabase->new($dir, $flag) };
+ my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) };
if ($@) {
die "Failed opening $dir: ", $@;
}
sub add_val ($$$) {
my ($doc, $col, $num) = @_;
- $num = Search::Xapian::sortable_serialise($num);
+ $num = sortable_serialise($num);
$doc->add_value($col, $num);
}
sub add_xapian ($$$$$) {
my ($self, $mime, $num, $oid, $mids, $mid0) = @_;
my $smsg = PublicInbox::SearchMsg->new($mime);
- my $doc = Search::Xapian::Document->new;
+ my $doc = $X->{Document}->new;
my $subj = $smsg->subject;
add_val($doc, PublicInbox::Search::TS(), $smsg->ts);
my @ds = gmtime($smsg->ds);
my $tg = $self->{term_generator};
return $tg if $tg;
- $tg = Search::Xapian::TermGenerator->new;
+ $tg = $X->{TermGenerator}->new;
$tg->set_stemmer($self->stemmer);
$self->{term_generator} = $tg;
sub get_val ($$) {
my ($doc, $col) = @_;
- Search::Xapian::sortable_unserialise($doc->get_value($col));
+ # sortable_unserialise is defined by PublicInbox::Search::load_xapian()
+ sortable_unserialise($doc->get_value($col));
}
sub to_doc_data {
my $maybe = pop @mods if $mods[-1] =~ /\A[0-9]+\z/;
my @need;
for my $mod (@mods) {
- eval "require $mod";
+ if ($mod eq 'Search::Xapian') {
+ require PublicInbox::Search;
+ PublicInbox::Search::load_xapian() and next;
+ } elsif ($mod eq 'Search::Xapian::WritableDatabase') {
+ require PublicInbox::SearchIdx;
+ PublicInbox::SearchIdx::load_xapian_writable() and next;
+ } else {
+ eval "require $mod";
+ }
push @need, $mod if $@;
}
return unless @need;
# Also, shard count may change while -watch is running
# due to "xcpdb --reshard"
if (-d $xpfx) {
+ require PublicInbox::Search;
+ PublicInbox::Search::load_xapian();
+ my $XapianDatabase = $PublicInbox::Search::X{Database};
foreach my $shard (<$xpfx/*>) {
-d $shard && $shard =~ m!/[0-9]+\z! or next;
eval {
- Search::Xapian::Database->new($shard)->close;
+ $XapianDatabase->new($shard)->close;
$n++;
};
}
use warnings;
use PublicInbox::Spawn qw(which spawn);
use PublicInbox::Over;
-use PublicInbox::Search;
+use PublicInbox::SearchIdx;
use File::Temp ();
use File::Path qw(remove_tree);
use File::Basename qw(dirname);
my ($ibx, $im, $reindex) = @_;
if ($ibx->{version} == 1) {
my $dir = $ibx->search->xdir(1);
- my $xdb = Search::Xapian::Database->new($dir);
+ my $xdb = $PublicInbox::Search::X{Database}->new($dir);
if (my $lc = $xdb->get_metadata('last_commit')) {
$reindex->{from} = $lc;
}
if (!$opt->{-coarse_lock}) {
$reindex = $opt->{reindex} = {};
$from = $reindex->{from} = [];
- require Search::Xapian::WritableDatabase;
+ require PublicInbox::SearchIdx;
+ PublicInbox::SearchIdx::load_xapian_writable();
}
$ibx->umask_prepare;
sub cpdb_retryable ($$) {
my ($src, $pfx) = @_;
- if (ref($@) eq 'Search::Xapian::DatabaseModifiedError') {
+ if (ref($@) =~ /\bDatabaseModifiedError\b/) {
warn "$pfx Xapian DB modified, reopening and retrying\n";
$src->reopen;
return 1;
my $new = $newdir->dirname;
my ($src, $cur_shard);
my $reshard;
+ PublicInbox::SearchIdx::load_xapian_writable() or die;
+ my $XapianDatabase = $PublicInbox::Search::X{Database};
if (ref($old) eq 'ARRAY') {
($cur_shard) = ($new =~ m!xap[0-9]+/([0-9]+)\b!);
defined $cur_shard or
# resharding, M:N copy means have full read access
foreach (@$old) {
if ($src) {
- my $sub = Search::Xapian::Database->new($_);
+ my $sub = $XapianDatabase->new($_);
$src->add_database($sub);
} else {
- $src = Search::Xapian::Database->new($_);
+ $src = $XapianDatabase->new($_);
}
}
} else {
- $src = Search::Xapian::Database->new($old);
+ $src = $XapianDatabase->new($old);
}
my ($tmp, $ft);
# like copydatabase(1), be sure we don't overwrite anything in case
# of other bugs:
- my $creat = Search::Xapian::DB_CREATE();
- my $dst = Search::Xapian::WritableDatabase->new($tmp, $creat);
+ my $creat = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
+ die if $@;
+ my $XapianWritableDatabase = $PublicInbox::Search::X{WritableDatabase};
+ my $dst = $XapianWritableDatabase->new($tmp, $creat);
my $pr = $opt->{-progress};
my $pfx = $opt->{-progress_pfx} = progress_pfx($new);
my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr;
# individually.
$src = undef;
foreach (@$old) {
- my $old = Search::Xapian::Database->new($_);
+ my $old = $XapianDatabase->new($_);
cpdb_loop($old, $dst, $pr_data, $cur_shard, $reshard);
}
} else {
SKIP: {
require PublicInbox::Search;
- PublicInbox::Search::load_xapian() or skip 'Search::Xapian missing', 2;
+ PublicInbox::Search::load_xapian() or
+ skip('Xapian perl binding missing', 2);
foreach my $l (qw(medium full)) {
import_index_incremental($PI_TEST_VERSION, $l, $mime);
}
my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common
Plack::Test URI::Escape Plack::Builder Plack::Test);
require_mods(@mods);
-use_ok($_) for @mods;
+use_ok($_) for (qw(HTTP::Request::Common Plack::Test));
use_ok 'PublicInbox::V2Writable';
my ($repo, $for_destroy) = tmpdir();
my $ibx = PublicInbox::Inbox->new({
my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
URI::Escape Plack::Builder);
require_mods(@mods);
-use_ok $_ foreach (@mods, qw(PublicInbox::SearchIdx));
+use_ok($_) for (qw(HTTP::Request::Common Plack::Test));
my ($tmpdir, $for_destroy) = tmpdir();
my $ibx = PublicInbox::Inbox->new({
use PublicInbox::Config;
use PublicInbox::WWW;
use PublicInbox::MID qw(mids);
-my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
- URI::Escape Plack::Builder);
-require_mods(@mods);
-use_ok($_) for @mods;
+require_mods(qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
+ URI::Escape Plack::Builder));
+use_ok($_) for (qw(HTTP::Request::Common Plack::Test));
use_ok 'PublicInbox::V2Writable';
my ($inboxdir, $for_destroy) = tmpdir();
my $ibx = {
require_git('2.6');
use PublicInbox::MIME;
use PublicInbox::InboxWritable;
+use PublicInbox::Search;
my $mime = PublicInbox::MIME->create(
header => [
# ensure docids in Xapian match NNTP article numbers
my $tot = 0;
my %tmp = %nums;
+ my $XapianDatabase = $PublicInbox::Search::X{Database};
foreach my $d (@new_shards) {
- my $xdb = Search::Xapian::Database->new($d);
+ my $xdb = $XapianDatabase->new($d);
$tot += $xdb->get_doccount;
my $it = $xdb->postlist_begin('');
my $end = $xdb->postlist_end('');