'doc-url=/lurker&format=en.html&query=id:%s'
);
+sub PARTIAL_MAX () { 100 }
+
+sub search_partial ($$) {
+ my ($srch, $mid) = @_;
+ my $opt = { limit => PARTIAL_MAX, mset => 2 };
+ my @try = ("m:$mid*");
+ my $chop = $mid;
+ if ($chop =~ s/(\W+)(\w*)\z//) {
+ my ($delim, $word) = ($1, $2);
+ if (length($word)) {
+ push @try, "m:$chop$delim";
+ push @try, "m:$chop$delim*";
+ }
+ push @try, "m:$chop";
+ push @try, "m:$chop*";
+ }
+
+ # break out long words individually to search for, because
+ # too many messages begin with "Pine.LNX." (or "alpine" or "nycvar")
+ if ($mid =~ /\w{9,}/) {
+ my @long = ($mid =~ m!(\w{3,})!g);
+ push(@try, join(' ', map { "m:$_" } @long));
+
+ # is the last element long enough to not trigger excessive
+ # wildcard matches?
+ if (length($long[-1]) > 8) {
+ $long[-1] .= '*';
+ push(@try, join(' ', map { "m:$_" } @long));
+ }
+ }
+
+ foreach my $m (@try) {
+ my $mset = eval { $srch->query($m, $opt) };
+ if (ref($@) eq 'Search::Xapian::QueryParserError') {
+ # If Xapian can't handle the wildcard since it
+ # has too many results.
+ next;
+ }
+ my @mids = map {
+ my $doc = $_->get_document;
+ PublicInbox::SearchMsg->load_doc($doc)->mid;
+ } $mset->items;
+ return \@mids if scalar(@mids);
+ }
+}
+
sub ext_msg {
my ($ctx) = @_;
my $cur = $ctx->{-inbox};
return exact($ctx, \@found, $mid) if @found;
# fall back to partial MID matching
- my $n_partial = 0;
my @partial;
-
- if (my $mm = $cur->mm) {
- my $tmp_mid = $mid;
- my $res = $mm->mid_prefixes($tmp_mid, 100);
- if ($res && scalar(@$res)) {
- $n_partial += scalar(@$res);
- push @partial, [ $cur, $res ];
- # fixup common errors:
- } elsif ($tmp_mid =~ s,/[tTf],,) {
- $res = $mm->mid_prefixes($tmp_mid, 100);
- if ($res && scalar(@$res)) {
- $n_partial += scalar(@$res);
- push @partial, [ $cur, $res ];
- }
- }
+ my $n_partial = 0;
+ my $srch = $cur->search;
+ my $mids = search_partial($srch, $mid) if $srch;
+ if ($mids) {
+ $n_partial = scalar(@$mids);
+ push @partial, [ $cur, $mids ];
}
# can't find a partial match in current inbox, try the others:
if (!$n_partial && length($mid) >= 16) {
- my $tmp_mid = $mid;
-again:
foreach my $ibx (@ibx) {
- my $mm = $ibx->mm or next;
- my $res = $mm->mid_prefixes($tmp_mid, 100);
- if ($res && scalar(@$res)) {
- $n_partial += scalar(@$res);
- push @partial, [ $ibx, $res ];
- last if $n_partial >= 100;
- }
- }
- # fixup common errors:
- if (!$n_partial && $tmp_mid =~ s,/[tTf],,) {
- goto again;
+ $srch = $ibx->search or next;
+ $mids = search_partial($srch, $mid) or next;
+ $n_partial += scalar(@$mids);
+ push @partial, [ $ibx, $mids];
+ last if $n_partial >= PARTIAL_MAX;
}
}
if ($n_partial) {
$code = 300;
my $es = $n_partial == 1 ? '' : 'es';
+ $n_partial .= '+' if ($n_partial == PARTIAL_MAX);
$s .= "\n$n_partial partial match$es found:\n\n";
my $cur_name = $cur->{name};
foreach my $pair (@partial) {
ReadOnly => !$writable,
sqlite_use_immediate_transaction => 1,
});
- $dbh->do('PRAGMA case_sensitive_like = ON');
$dbh;
}
($min, $sth->fetchrow_array);
}
-sub mid_prefixes {
- my ($self, $pfx, $limit) = @_;
-
- die "No prefix given" unless (defined $pfx && $pfx ne '');
- $pfx =~ s/([%_])/\\$1/g;
- $pfx .= '%';
-
- $limit ||= 100;
- $limit += 0; # force to integer
- $limit ||= 100;
-
- $self->{dbh}->selectcol_arrayref('SELECT mid FROM msgmap ' .
- 'WHERE mid LIKE ? ESCAPE ? ' .
- "ORDER BY num DESC LIMIT $limit",
- undef, $pfx, '\\');
-}
-
sub mid_delete {
my ($self, $mid) = @_;
my $dbh = $self->{dbh};
$qp->set_database($self->{xdb});
$qp->set_stemmer($self->stemmer);
$qp->set_stemming_strategy(STEM_SOME);
+ $qp->set_max_wildcard_expansion(100);
$qp->add_valuerangeprocessor(
Search::Xapian::NumberValueRangeProcessor->new(YYYYMMDD, 'd:'));
$qp->add_valuerangeprocessor(
foreach my $mid (@$mids) {
$tg->index_text($mid, 1, 'XM');
+
+ # because too many Message-IDs are prefixed with
+ # "Pine.LNX."...
+ if ($mid =~ /\w{12,}/) {
+ my @long = ($mid =~ /(\w{3,}+)/g);
+ $tg->index_text(join(' ', @long), 1, 'XM');
+ }
$tg->increase_termpos;
}
$smsg->{to} = $smsg->{cc} = '';
$res = cgi_run("/test/blahblah\@example.com/raw");
like($res->{body}, qr/Message-Id: <blahblah\@example\.com>/,
"mid raw hit");
- $res = cgi_run("/test/blahblah\@example.con/raw");
- like($res->{head}, qr/Status: 300 Multiple Choices/, "mid raw miss");
$res = cgi_run("/test/blahblah\@example.com/");
like($res->{body}, qr/\A<html>/, "mid html hit");
like($res->{head}, qr/Status: 200 OK/, "200 response");
- $res = cgi_run("/test/blahblah\@example.con/");
- like($res->{head}, qr/Status: 300 Multiple Choices/, "mid html miss");
$res = cgi_run("/test/blahblah\@example.com/f/");
like($res->{head}, qr/Status: 301 Moved/, "301 response");
like($res->{head},
qr!^Location: http://[^/]+/test/blahblah\@example\.com/\r\n!ms,
'301 redirect location');
- $res = cgi_run("/test/blahblah\@example.con/");
- like($res->{head}, qr/Status: 300 Multiple Choices/, "mid html miss");
$res = cgi_run("/test/new.html");
like($res->{body}, qr/slashy%2Fasdf\@example\.com/,
is($d->num_for($mid), $mid2num{$mid}, "mid:$mid maps correctly");
}
-is_deeply($d->mid_prefixes('a'), [qw(aa@cc aa@bb a@b)], "mid_prefixes match");
-is_deeply($d->mid_prefixes('A'), [], "mid_prefixes is case sensitive");
-
is(undef, $d->last_commit, "last commit not set");
my $lc = 'deadbeef' x 5;
is(undef, $d->last_commit($lc), 'previous last commit (undef) returned');
is(0, system(qw(git init -q --bare), $git_dir), "git init (main)");
my $rw = PublicInbox::SearchIdx->new($git_dir, 1);
ok($rw, "search indexer created");
-my $data = <<'EOF';
+my $digits = '10010260936330';
+my $ua = 'Pine.LNX.4.10';
+my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com";
+my $data = <<"EOF";
Subject: test
-Message-Id: <utf8@example>
-From: Ævar Arnfjörð Bjarmason <avarab@example>
-To: git@vger.kernel.org
+Message-ID: <$mid>
+From: Ævar Arnfjörð Bjarmason <avarab\@example>
+To: git\@vger.kernel.org
EOF
my $mime = Email::MIME->new(\$_);
my $bytes = bytes::length($mime->as_string);
my $doc_id = $rw->add_message($mime, $bytes, ++$num, 'ignored');
- my $mid = $mime->header('Message-Id');
- ok($doc_id, 'message added: '. $mid);
+ ok($doc_id, 'message added');
}
$rw->commit_txn_lazy;
$res = $cb->(POST('/test/?q=s:bogus&x=m'));
is($res->code, 404, 'failed search result gives 404');
is_deeply([], $warn, 'no warnings');
+
+ my $mid_re = qr/\Q$mid\E/o;
+ while (length($digits) > 8) {
+ $res = $cb->(GET("/test/$ua.$digits/"));
+ is($res->code, 300, 'partial match found while truncated');
+ like($res->content, qr/\b1 partial match found\b/);
+ like($res->content, $mid_re, 'found mid in response');
+ chop($digits);
+ }
});
done_testing();
"sharedRepository respected for $bn");
}
+$ibx->with_umask(sub {
+ $rw_commit->();
+ my $digits = '10010260936330';
+ my $ua = 'Pine.LNX.4.10';
+ my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com";
+ is($ro->reopen->query("m:$digits", { mset => 1})->size, 0,
+ 'no results yet');
+ my $pine = Email::MIME->create(
+ header_str => [
+ Subject => 'blah',
+ 'Message-ID' => "<$mid>",
+ From => 'torvalds@transmeta',
+ To => 'list@example.com',
+ ],
+ body => ""
+ );
+ my $x = $rw->add_message($pine);
+ $rw->commit_txn_lazy;
+ is($ro->reopen->query("m:$digits", { mset => 1})->size, 1,
+ 'searching only digit yielded result');
+
+ my $wild = $digits;
+ for my $i (1..6) {
+ chop($wild);
+ is($ro->query("m:$wild*", { mset => 1})->size, 1,
+ "searching chopped($i) digit yielded result $wild ");
+ }
+ is($ro->query("m:Pine m:LNX m:10010260936330", {mset=>1})->size, 1);
+});
+
done_testing();
1;