X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSearch.pm;h=58653c9e8fc41f590bac28b4b0814756ff3960e3;hb=0c8106d44f317175e122744b43407bf067183175;hp=b1d38fb901b2f0fcce07513bd559be71b812fa60;hpb=dbf250d9423ccc38377c35eef8d43e3e11723253;p=public-inbox.git diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index b1d38fb9..58653c9e 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -6,7 +6,7 @@ package PublicInbox::Search; use strict; use parent qw(Exporter); -our @EXPORT_OK = qw(retry_reopen); +our @EXPORT_OK = qw(retry_reopen int_val); use List::Util qw(max); # values for searching, changing the numeric value breaks @@ -58,7 +58,11 @@ our $QP_FLAGS; our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query); our $Xap; # 'Search::Xapian' or 'Xapian' our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor') -our $ENQ_ASCENDING; + +# ENQ_DESCENDING and ENQ_ASCENDING weren't in SWIG Xapian.pm prior to 1.4.16, +# let's hope the ABI is stable +our $ENQ_DESCENDING = 0; +our $ENQ_ASCENDING = 1; sub load_xapian () { return 1 if defined $Xap; @@ -84,13 +88,8 @@ sub load_xapian () { 'NumberRangeProcessor' : 'NumberValueRangeProcessor'); $X{$_} = $Xap.'::'.$_ for (keys %X); - # ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm, - # so lets hope this part of the ABI is stable because it's - # just an integer: - $ENQ_ASCENDING = $x eq 'Xapian' ? - 1 : Search::Xapian::ENQ_ASCENDING(); - *sortable_serialise = $x.'::sortable_serialise'; + *sortable_unserialise = $x.'::sortable_unserialise'; # n.b. FLAG_PURE_NOT is expensive not suitable for a public # website as it could become a denial-of-service vector # FLAG_PHRASE also seems to cause performance problems chert @@ -191,41 +190,28 @@ sub xdir ($;$) { } } -sub xdb_sharded { +# returns all shards as separate Xapian::Database objects w/o combining +sub xdb_shards_flat ($) { my ($self) = @_; - opendir(my $dh, $self->{xpfx}) or return; # not initialized yet - - # We need numeric sorting so shard[0] is first for reading - # Xapian metadata, if needed - my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return; + my $xpfx = $self->{xpfx}; my (@xdb, $slow_phrase); - for (0..$last) { - my $shard_dir = "$self->{xpfx}/$_"; - if (-d $shard_dir && -r _) { + load_xapian(); + if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) { + @xdb = ($X{Database}->new($xpfx)); + $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert"; + } else { + opendir(my $dh, $xpfx) or return (); # not initialized yet + # We need numeric sorting so shard[0] is first for reading + # Xapian metadata, if needed + my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return (); + for (0..$last) { + my $shard_dir = "$self->{xpfx}/$_"; push @xdb, $X{Database}->new($shard_dir); $slow_phrase ||= -f "$shard_dir/iamchert"; - } else { # gaps from missing epochs throw off mdocid() - warn "E: $shard_dir missing or unreadable\n"; - return; } + $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase; } - $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase; - $self->{nshard} = scalar(@xdb); - my $xdb = shift @xdb; - $xdb->add_database($_) for @xdb; - $xdb; -} - -sub _xdb { - my ($self) = @_; - my $dir = xdir($self, 1); - $self->{qp_flags} //= $QP_FLAGS; - if ($self->{ibx_ver} >= 2) { - xdb_sharded($self); - } else { - $self->{qp_flags} |= FLAG_PHRASE() if !-f "$dir/iamchert"; - $X{Database}->new($dir); - } + @xdb; } # v2 Xapian docids don't conflict, so they're identical to @@ -239,37 +225,30 @@ sub mdocid { sub mset_to_artnums { my ($self, $mset) = @_; - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; [ map { mdocid($nshard, $_) } $mset->items ]; } sub xdb ($) { my ($self) = @_; $self->{xdb} //= do { - load_xapian(); - $self->_xdb; + $self->{qp_flags} //= $QP_FLAGS; + my @xdb = $self->xdb_shards_flat or return; + $self->{nshard} = scalar(@xdb); + my $xdb = shift @xdb; + $xdb->add_database($_) for @xdb; + $xdb; }; } -sub xpfx_init ($) { - my ($self) = @_; - if ($self->{ibx_ver} == 1) { - $self->{xpfx} .= '/public-inbox/xapian' . SCHEMA_VERSION; - } else { - $self->{xpfx} .= '/xap'.SCHEMA_VERSION; - } -} - sub new { my ($class, $ibx) = @_; ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; - my $self = bless { - xpfx => $ibx->{inboxdir}, # for xpfx_init + my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian'; + bless { + xpfx => "$ibx->{inboxdir}/$xap" . SCHEMA_VERSION, altid => $ibx->{altid}, - ibx_ver => $ibx->version, }, $class; - xpfx_init($self); - $self; } sub reopen { @@ -286,7 +265,6 @@ sub mset { $opts ||= {}; my $qp = $self->{qp} //= qparse_new($self); my $query = $qp->parse_query($query_string, $self->{qp_flags}); - $opts->{relevance} = 1 unless exists $opts->{relevance}; _do_enquire($self, $query, $opts); } @@ -344,13 +322,17 @@ sub _enquire_once { # retry_reopen callback $enquire->set_query($query); $opts ||= {}; my $desc = !$opts->{asc}; - if (($opts->{mset} || 0) == 2) { # mset == 2: ORDER BY docid/UID + my $rel = $opts->{relevance} // 0; + if ($rel == -1) { # ORDER BY docid/UID + $enquire->set_weighting_scheme($X{BoolWeight}->new); $enquire->set_docid_order($ENQ_ASCENDING); + } elsif ($rel == 0) { + $enquire->set_sort_by_value_then_relevance(TS, $desc); + } elsif ($rel == -2) { $enquire->set_weighting_scheme($X{BoolWeight}->new); - } elsif ($opts->{relevance}) { + $enquire->set_docid_order($ENQ_DESCENDING); + } else { # rel > 0 $enquire->set_sort_by_relevance_then_value(TS, $desc); - } else { - $enquire->set_sort_by_value_then_relevance(TS, $desc); } # `mairix -t / --threads' or JMAP collapseThreads @@ -362,7 +344,7 @@ sub _enquire_once { # retry_reopen callback sub mset_to_smsg { my ($self, $ibx, $mset) = @_; - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; my $i = 0; my %order = map { mdocid($nshard, $_) => ++$i } $mset->items; my @msgs = sort { @@ -436,4 +418,10 @@ sub help { \@ret; } +sub int_val ($$) { + my ($doc, $col) = @_; + my $val = $doc->get_value($col) or return; # undefined is '' in Xapian + sortable_unserialise($val) + 0; # PV => IV conversion +} + 1;