use Search::Xapian qw/:standard/;
use PublicInbox::SearchMsg;
-use Email::MIME;
+use PublicInbox::MIME;
use PublicInbox::MID qw/mid_clean id_compress/;
# This is English-only, everything else is non-standard and may be confused as
# 10 - optimize doc for NNTP overviews
# 11 - merge threads when vivifying ghosts
# 12 - change YYYYMMDD value column to numeric
- SCHEMA_VERSION => 12,
+ # 13 - fix threading for empty References/In-Reply-To
+ # (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0)
+ # 14 - fix ghost root vivification
+ SCHEMA_VERSION => 14,
# n.b. FLAG_PURE_NOT is expensive not suitable for a public website
# as it could become a denial-of-service vector
tc => 'XTO XCC',
c => 'XCC',
tcf => 'XTO XCC A',
+ a => 'XTO XCC A',
b => 'XNQ XQUOT',
bs => 'XNQ XQUOT S',
+ n => 'XFN',
- # n.b.: leaving out "a:" alias for "tcf:" even though
- # mairix supports it. It is only mentioned in passing in mairix(1)
- # and the extra two letters are not significantly longer.
q => 'XQUOT',
nq => 'XNQ',
# default:
- '' => 'XMID S A XNQ XQUOT',
+ '' => 'XMID S A XNQ XQUOT XFN',
);
# not documenting m: and mid: for now, the using the URLs works w/o Xapian
our @HELP = (
- 's:' => <<EOF,
-match within Subject only e.g. s:"a quick brown fox"
-This is a probabilistic search with support for stemming
-and wildcards '*'
-EOF
+ 's:' => 'match within Subject e.g. s:"a quick brown fox"',
'd:' => <<EOF,
date range as YYYYMMDD e.g. d:19931002..20101002
Open-ended ranges such as d:19931002.. and d:..20101002
-are also supported.
+are also supported
EOF
+ 'b:' => 'match within message body, including text attachments',
+ 'nq:' => 'match non-quoted text within message body',
+ 'q:' => 'match quoted text within message body',
+ 'n:' => 'match filename of attachment(s)',
+ 't:' => 'match within the To header',
+ 'c:' => 'match within the Cc header',
+ 'f:' => 'match within the From header',
+ 'a:' => 'match within the To, Cc, and From headers',
+ 'tc:' => 'match within the To and Cc headers',
+ 'bs:' => 'match within the Subject and body',
);
-# TODO: (from mairix, some of these are maybe)
-# b (body), f (From:), c (Cc:), n (attachment), t (To:)
-# tc (To:+Cc:), bs (body + Subject), tcf (To: +Cc: +From:)
-#
-# Non-mairix:
+chomp @HELP;
+# TODO:
# df (filenames from diff)
-# nq (non-quoted body)
# da (diff a/ removed lines)
# db (diff b/ added lines)
sub xpfx { $all_pfx{$_[0]} }
-our %PFX2TERM_RMAP;
-my %meta_pfx = (mid => 1, thread => 1, path => 1);
-while (my ($k, $v) = each %all_pfx) {
- $PFX2TERM_RMAP{$v} = $k if $meta_pfx{$k};
-}
-
my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail');
sub xdir {
}
$opts ||= {};
$opts->{limit} ||= 1000;
+
+ # always sort threads by timestamp, this makes life easier
+ # for the threading algorithm (in SearchThread.pm)
+ $opts->{asc} = 1;
+
_do_enquire($self, $qtid, $opts);
}
-sub _do_enquire {
- my ($self, $query, $opts) = @_;
+sub retry_reopen {
+ my ($self, $cb) = @_;
my $ret;
for (1..10) {
- eval { $ret = _enquire_once($self, $query, $opts) };
+ eval { $ret = $cb->() };
return $ret unless $@;
# Exception: The revision being read has been discarded -
# you should call Xapian::Database::reopen()
- if (index($@, 'Xapian::Database::reopen') >= 0) {
+ if (ref($@) eq 'Search::Xapian::DatabaseModifiedError') {
reopen($self);
} else {
- die $@;
+ die;
}
}
}
+sub _do_enquire {
+ my ($self, $query, $opts) = @_;
+ retry_reopen($self, sub { _enquire_once($self, $query, $opts) });
+}
+
sub _enquire_once {
my ($self, $query, $opts) = @_;
my $enquire = $self->enquire;
/\Aserial:(\w+):/ or next;
my $pfx = $1;
push @$user_pfx, "$pfx:", <<EOF;
-alternate serial number e.g. $pfx:12345
+alternate serial number e.g. $pfx:12345 (boolean)
EOF
# gmane => XGMANE
$qp->add_boolean_prefix($pfx, 'X'.uc($pfx));
}
+ chomp @$user_pfx;
}
while (my ($name, $prefix) = each %prob_prefix) {
# raises on error:
my $doc = $self->{xdb}->get_document($doc_id);
$smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
- $smsg->doc_id($doc_id);
+ $smsg->{doc_id} = $doc_id;
}
$smsg;
}
sub lookup_mail { # no ghosts!
my ($self, $mid) = @_;
- my $smsg = lookup_message($self, $mid) or return;
- PublicInbox::SearchMsg->load_doc($smsg->{doc});
+ retry_reopen($self, sub {
+ my $smsg = lookup_message($self, $mid) or return;
+ PublicInbox::SearchMsg->load_doc($smsg->{doc});
+ });
}
sub find_unique_doc_id {
}
# normalize subjects so they are suitable as pathnames for URLs
+# XXX: consider for removal
sub subject_path {
my $subj = pop;
$subj = subject_normalized($subj);
$subj;
}
-# for doc data
-sub subject_summary {
- my $subj = pop;
- my $max = 68;
- if (length($subj) > $max) {
- my @subj = split(/\s+/, $subj);
- $subj = '';
- my $l;
-
- while ($l = shift @subj) {
- my $new = $subj . $l . ' ';
- last if length($new) >= $max;
- $subj = $new;
- }
- if ($subj ne '') {
- my $r = scalar @subj ? ' ...' : '';
- $subj =~ s/ \z/$r/s;
- } else {
- # subject has one REALLY long word, and NOT spam? wtf
- @subj = ($l =~ /\A(.{1,72})/);
- $subj = $subj[0] . ' ...';
- }
- }
- $subj;
-}
-
sub enquire {
my ($self) = @_;
$self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb});