X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSearch.pm;h=5c9dccb53af327282d10df960f7f112801986e58;hb=55b707d788ce13696e4411389583e720ea6dab01;hp=60fc861ab63da8c2b07d612c5a9ca0d6160630fe;hpb=ec32087fbd0af943017d2047eedbdbd59d3291da;p=public-inbox.git diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 60fc861a..5c9dccb5 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 all contributors +# Copyright (C) 2015-2019 all contributors # License: AGPL-3.0+ # based on notmuch, but with no concept of folders, files or flags # @@ -13,24 +13,44 @@ use constant YYYYMMDD => 1; # Date: header for searching in the WWW UI use constant DT => 2; # Date: YYYYMMDDHHMMSS use PublicInbox::SearchMsg; -use PublicInbox::MIME; -use PublicInbox::MID qw/id_compress/; use PublicInbox::Over; my $QP_FLAGS; +our %X = map { $_ => 0 } qw(BoolWeight Database Enquire + NumberValueRangeProcessor QueryParser Stem); +our $Xap; # 'Search::Xapian' or 'Xapian' +my $ENQ_ASCENDING; + sub load_xapian () { - $QP_FLAGS ||= eval { - require Search::Xapian; - Search::Xapian->import(qw(:standard)); + return 1 if defined $Xap; + for my $x (qw(Search::Xapian Xapian)) { + eval "require $x"; + next if $@; + + $x->import(qw(:standard)); + $Xap = $x; + $X{$_} = $Xap.'::'.$_ for (keys %X); + + # ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm, + # so lets hope this part of the ABI is stable because it's + # just an integer: + $ENQ_ASCENDING = $x eq 'Xapian' ? + 1 : Search::Xapian::ENQ_ASCENDING(); + # for SearchMsg: + *PublicInbox::SearchMsg::sortable_unserialise = + $Xap.'::sortable_unserialise'; # n.b. FLAG_PURE_NOT is expensive not suitable for a public # website as it could become a denial-of-service vector # FLAG_PHRASE also seems to cause performance problems chert # (and probably earlier Xapian DBs). glass seems fine... # TODO: make this an option, maybe? # or make indexlevel=medium as default - FLAG_PHRASE()|FLAG_BOOLEAN()|FLAG_LOVEHATE()|FLAG_WILDCARD(); - }; -}; + $QP_FLAGS = FLAG_PHRASE() | FLAG_BOOLEAN() | FLAG_LOVEHATE() | + FLAG_WILDCARD(); + return 1; + } + undef; +} # This is English-only, everything else is non-standard and may be confused as # a prefix common in patch emails @@ -54,6 +74,9 @@ use constant { # 13 - fix threading for empty References/In-Reply-To # (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0) # 14 - fix ghost root vivification + # 15 - see public-inbox-v2-format(5) + # further bumps likely unnecessary, we'll suggest in-place + # "--reindex" use for further fixes and tweaks SCHEMA_VERSION => 15, }; @@ -126,9 +149,9 @@ chomp @HELP; sub xdir ($;$) { my ($self, $rdonly) = @_; if ($self->{version} == 1) { - "$self->{mainrepo}/public-inbox/xapian" . SCHEMA_VERSION; + "$self->{inboxdir}/public-inbox/xapian" . SCHEMA_VERSION; } else { - my $dir = "$self->{mainrepo}/xap" . SCHEMA_VERSION; + my $dir = "$self->{inboxdir}/xap" . SCHEMA_VERSION; return $dir if $rdonly; my $shard = $self->{shard}; @@ -145,7 +168,7 @@ sub _xdb ($) { if ($self->{version} >= 2) { foreach my $shard (<$dir/*>) { -d $shard && $shard =~ m!/[0-9]+\z! or next; - my $sub = Search::Xapian::Database->new($shard); + my $sub = $X{Database}->new($shard); if ($xdb) { $xdb->add_database($sub); } else { @@ -155,7 +178,7 @@ sub _xdb ($) { } } else { $slow_phrase = -f "$dir/iamchert"; - $xdb = Search::Xapian::Database->new($dir); + $xdb = $X{Database}->new($dir); } $$qpf |= FLAG_PHRASE() unless $slow_phrase; $xdb; @@ -173,7 +196,7 @@ sub new { my ($class, $ibx) = @_; ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; my $self = bless { - mainrepo => $ibx->{mainrepo}, + inboxdir => $ibx->{inboxdir}, altid => $ibx->{altid}, version => $ibx->{version} // 1, }, $class; @@ -206,20 +229,20 @@ sub query { } sub retry_reopen { - my ($self, $cb) = @_; + my ($self, $cb, $arg) = @_; for my $i (1..10) { if (wantarray) { my @ret; - eval { @ret = $cb->() }; + eval { @ret = $cb->($arg) }; return @ret unless $@; } else { my $ret; - eval { $ret = $cb->() }; + eval { $ret = $cb->($arg) }; return $ret unless $@; } # Exception: The revision being read has been discarded - # you should call Xapian::Database::reopen() - if (ref($@) eq 'Search::Xapian::DatabaseModifiedError') { + if (ref($@) =~ /\bDatabaseModifiedError\b/) { warn "reopen try #$i on $@\n"; reopen($self); } else { @@ -234,19 +257,19 @@ sub retry_reopen { sub _do_enquire { my ($self, $query, $opts) = @_; - retry_reopen($self, sub { _enquire_once($self, $query, $opts) }); + retry_reopen($self, \&_enquire_once, [ $self, $query, $opts ]); } -sub _enquire_once { - my ($self, $query, $opts) = @_; +sub _enquire_once { # retry_reopen callback + my ($self, $query, $opts) = @{$_[0]}; my $xdb = xdb($self); - my $enquire = Search::Xapian::Enquire->new($xdb); + my $enquire = $X{Enquire}->new($xdb); $enquire->set_query($query); $opts ||= {}; my $desc = !$opts->{asc}; if (($opts->{mset} || 0) == 2) { - $enquire->set_docid_order(Search::Xapian::ENQ_ASCENDING()); - $enquire->set_weighting_scheme(Search::Xapian::BoolWeight->new); + $enquire->set_docid_order($ENQ_ASCENDING); + $enquire->set_weighting_scheme($X{BoolWeight}->new); } elsif ($opts->{relevance}) { $enquire->set_sort_by_relevance_then_value(TS, $desc); } else { @@ -256,16 +279,14 @@ sub _enquire_once { my $limit = $opts->{limit} || 50; my $mset = $enquire->get_mset($offset, $limit); return $mset if $opts->{mset}; - my @msgs = map { - PublicInbox::SearchMsg->load_doc($_->get_document); - } $mset->items; + my @msgs = map { PublicInbox::SearchMsg::from_mitem($_) } $mset->items; return \@msgs unless wantarray; ($mset->get_matches_estimated, \@msgs) } # read-write -sub stemmer { Search::Xapian::Stem->new($LANG) } +sub stemmer { $X{Stem}->new($LANG) } # read-only sub qp { @@ -275,16 +296,15 @@ sub qp { return $qp if $qp; my $xdb = xdb($self); # new parser - $qp = Search::Xapian::QueryParser->new; + $qp = $X{QueryParser}->new; $qp->set_default_op(OP_AND()); $qp->set_database($xdb); $qp->set_stemmer($self->stemmer); $qp->set_stemming_strategy(STEM_SOME()); $qp->set_max_wildcard_expansion(100); - $qp->add_valuerangeprocessor( - Search::Xapian::NumberValueRangeProcessor->new(YYYYMMDD, 'd:')); - $qp->add_valuerangeprocessor( - Search::Xapian::NumberValueRangeProcessor->new(DT, 'dt:')); + my $nvrp = $X{NumberValueRangeProcessor}; + $qp->add_valuerangeprocessor($nvrp->new(YYYYMMDD, 'd:')); + $qp->add_valuerangeprocessor($nvrp->new(DT, 'dt:')); while (my ($name, $prefix) = each %bool_pfx_external) { $qp->add_boolean_prefix($name, $_) foreach split(/ /, $prefix); @@ -314,11 +334,6 @@ EOF $self->{query_parser} = $qp; } -sub lookup_article { - my ($self, $num) = @_; - $self->{over_ro}->get_art($num); -} - sub help { my ($self) = @_; $self->qp; # parse altids