X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSearch.pm;h=273cc57cf94ba8b12d18acfcefd4aaa13e8a2c7a;hb=771dda802ec8c13dc13a1daabcefee9f3df9bb38;hp=7e19e616a2180e28fe7470698b3d555360ee4c93;hpb=4d594e98063aaad1ce9a90709af7edc5c44a0163;p=public-inbox.git
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 7e19e616..273cc57c 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -1,10 +1,11 @@
-# Copyright (C) 2015-2021 all contributors
+# Copyright (C) all contributors
# License: AGPL-3.0+
# based on notmuch, but with no concept of folders, files or flags
#
# Read-only search interface for use by the web and NNTP interfaces
package PublicInbox::Search;
use strict;
+use v5.10.1;
use parent qw(Exporter);
our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
use List::Util qw(max);
@@ -15,7 +16,7 @@ use Carp ();
# compatibility with old indices (so don't change them it)
use constant {
TS => 0, # Received: in Unix time (IMAP INTERNALDATE, JMAP receivedAt)
- YYYYMMDD => 1, # Date: header for searching in the WWW UI
+ YYYYMMDD => 1, # redundant with DT below
DT => 2, # Date: YYYYMMDDHHMMSS (IMAP SENT*, JMAP sentAt)
# added for public-inbox 1.6.0+
@@ -117,9 +118,10 @@ my %bool_pfx_external = (
dfpre => 'XDFPRE',
dfpost => 'XDFPOST',
dfblob => 'XDFPRE XDFPOST',
+ patchid => 'XDFID',
);
-my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST';
+my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST XDFID';
my %prob_prefix = (
# for mairix compatibility
s => 'S',
@@ -154,12 +156,9 @@ my %prob_prefix = (
our @HELP = (
's:' => 'match within Subject e.g. s:"a quick brown fox"',
'd:' => < < 'match within message body, including text attachments',
'nq:' => 'match non-quoted text within message body',
@@ -180,6 +179,10 @@ EOF
'dfpre:' => 'match pre-image git blob ID',
'dfpost:' => 'match post-image git blob ID',
'dfblob:' => 'match either pre or post-image git blob ID',
+ 'patchid:' => "match `git patch-id --stable' output",
+ 'rt:' => <{shard})) {
$self->{xpfx};
- } else { # v2 only:
+ } else { # v2 + extindex only:
"$self->{xpfx}/$self->{shard}";
}
}
@@ -199,7 +202,7 @@ sub xdb_shards_flat ($) {
my (@xdb, $slow_phrase);
load_xapian();
$self->{qp_flags} //= $QP_FLAGS;
- if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
+ if ($xpfx =~ m!/xapian[0-9]+\z!) {
@xdb = ($X{Database}->new($xpfx));
$self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
} else {
@@ -234,12 +237,12 @@ sub mset_to_artnums {
sub xdb ($) {
my ($self) = @_;
- $self->{xdb} //= do {
+ $self->{xdb} // do {
my @xdb = $self->xdb_shards_flat or return;
$self->{nshard} = scalar(@xdb);
my $xdb = shift @xdb;
$xdb->add_database($_) for @xdb;
- $xdb;
+ $self->{xdb} = $xdb;
};
}
@@ -247,10 +250,10 @@ sub new {
my ($class, $ibx) = @_;
ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian';
- bless {
- xpfx => "$ibx->{inboxdir}/$xap" . SCHEMA_VERSION,
- altid => $ibx->{altid},
- }, $class;
+ my $xpfx = "$ibx->{inboxdir}/$xap".SCHEMA_VERSION;
+ my $self = bless { xpfx => $xpfx }, $class;
+ $self->{altid} = $ibx->{altid} if defined($ibx->{altid});
+ $self;
}
sub reopen {
@@ -332,7 +335,7 @@ sub date_parse_prepare {
push @$to_parse, $x;
$x = "\0%s$#$to_parse\0";
}
- $r[1] //= "\0%s+\0";
+ $r[1] //= "\0%s+\0"; # add 1 day
}
"$pfx:".join('..', @r).$end;
}
@@ -342,9 +345,12 @@ sub date_parse_finalize {
# git-rev-parse can handle any number of args up to system
# limits (around (4096*32) bytes on Linux).
my @r = $git->date_parse(@$to_parse);
- my $i;
- $_[2] =~ s/\0(%[%YmdHMSs]+)([0-9\+]+)\0/strftime($1,
- gmtime($2 eq '+' ? ($r[$i]+86400) : $r[$i=$2+0]))/sge;
+ # n.b. git respects TZ, times stored in SQLite/Xapian are always UTC,
+ # and gmtime doesn't seem to do the right thing when TZ!=UTC
+ my ($i, $t);
+ $_[2] =~ s/\0(%[%YmdHMSs]+)([0-9\+]+)\0/
+ $t = $2 eq '+' ? ($r[$i]+86400) : $r[$i=$2+0];
+ $1 eq '%s' ? $t : strftime($1, gmtime($t))/sge;
}
# n.b. argv never has NUL, though we'll need to filter it out
@@ -395,18 +401,15 @@ sub retry_reopen {
my ($self, $cb, @arg) = @_;
for my $i (1..10) {
if (wantarray) {
- my @ret;
- eval { @ret = $cb->($self, @arg) };
+ my @ret = eval { $cb->($self, @arg) };
return @ret unless $@;
} else {
- my $ret;
- eval { $ret = $cb->($self, @arg) };
+ my $ret = eval { $cb->($self, @arg) };
return $ret unless $@;
}
# Exception: The revision being read has been discarded -
# you should call Xapian::Database::reopen()
if (ref($@) =~ /\bDatabaseModifiedError\b/) {
- warn "# reopen try #$i on $@\n";
reopen($self);
} else {
# let caller decide how to spew, because ExtMsg queries
@@ -457,8 +460,9 @@ sub _enquire_once { # retry_reopen callback
$enquire->set_sort_by_relevance_then_value(TS, !$opts->{asc});
}
- # `mairix -t / --threads' or JMAP collapseThreads
- if ($opts->{threads} && has_threadid($self)) {
+ # `lei q -t / --threads' or JMAP collapseThreads; but don't collapse
+ # on `-tt' ({threads} > 1) which sets the Flagged|Important keyword
+ if (($opts->{threads} // 0) == 1 && has_threadid($self)) {
$enquire->set_collapse_key(THREADID);
}
$enquire->get_mset($opts->{offset} || 0, $opts->{limit} || 50);
@@ -540,9 +544,10 @@ sub help {
\@ret;
}
+# always returns a scalar value
sub int_val ($$) {
my ($doc, $col) = @_;
- my $val = $doc->get_value($col) or return; # undefined is '' in Xapian
+ my $val = $doc->get_value($col) or return undef; # undef is '' in Xapian
sortable_unserialise($val) + 0; # PV => IV conversion
}
@@ -568,4 +573,12 @@ sub xap_terms ($$;@) {
wantarray ? sort(keys(%ret)) : \%ret;
}
+# get combined docid from over.num:
+# (not generic Xapian, only works with our sharding scheme)
+sub num2docid ($$) {
+ my ($self, $num) = @_;
+ my $nshard = $self->{nshard};
+ ($num - 1) * $nshard + $num % $nshard + 1;
+}
+
1;