lib/PublicInbox/Search.pm | 9 +++++++++ lib/PublicInbox/SearchIdx.pm | 6 ++++++ t/search.t | 31 +++++++++++++++++++++++++++++++ diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 86a6ad674b3e1fbfdb70d3a04d8dac40d4fae881..b7db2b9f7fcc91aacc317ef3e439b6349934083c 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -77,11 +77,17 @@ # 14 - fix ghost root vivification # 15 - see public-inbox-v2-format(5) # further bumps likely unnecessary, we'll suggest in-place # "--reindex" use for further fixes and tweaks + # + # public-inbox v1.5.0 adds (still SCHEMA_VERSION=15): + # * "lid:" and "l:" for List-Id searches SCHEMA_VERSION => 15, }; +# note: the non-X term prefix allocations are shared with +# Xapian omega, see xapian-applications/omega/docs/termprefixes.rst my %bool_pfx_external = ( mid => 'Q', # Message-ID (full/exact), this is mostly uniQue + lid => 'G', # newsGroup (or similar entity), just inside <> dfpre => 'XDFPRE', dfpost => 'XDFPOST', dfblob => 'XDFPRE XDFPOST', @@ -92,6 +98,7 @@ my %prob_prefix = ( # for mairix compatibility s => 'S', m => 'XM', # 'mid:' (bool) is exact, 'm:' (prob) can do partial + l => 'XL', # 'lid:' (bool) is exact, 'l:' (prob) can do partial f => 'A', t => 'XTO', tc => 'XTO XCC', @@ -134,6 +141,8 @@ 'c:' => 'match within the Cc header', 'f:' => 'match within the From header', 'a:' => 'match within the To, Cc, and From headers', 'tc:' => 'match within the To and Cc headers', + 'lid:' => 'exact contents of the List-Id', + 'l:' => 'partial match contents of the List-Id header', 'bs:' => 'match within the Subject and body', 'dfn:' => 'match filename from diff', 'dfa:' => 'match diff removed (-) lines', diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 25118f436139baab6fbfd5c3df13700e92e2241f..998341a7d4d5bfa069f02e6beca5e75ad04def74 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -352,6 +352,12 @@ } } } $doc->add_boolean_term('Q' . $_) foreach @$mids; + for my $l ($hdr->header_raw('List-Id')) { + $l =~ /<([^>]+)>/ or next; + my $lid = $1; + $doc->add_boolean_term('G' . $lid); + index_text($self, $lid, 1, 'XL'); # probabilistic + } $self->{xdb}->replace_document($smsg->{num}, $doc); } diff --git a/t/search.t b/t/search.t index 83986837eaf9190e8c98aa21115dfeb2db19c38a..92f3305d556c5773fadc1e64f6e9f544904ff06f 100644 --- a/t/search.t +++ b/t/search.t @@ -66,6 +66,7 @@ Subject: Hello world Message-ID: From: John Smith To: list@example.com +List-Id: I'm not mad \m/ EOF @@ -77,6 +78,7 @@ Message-ID: From: John Smith To: list@example.com Cc: foo@example.com +List-Id: there's nothing goodbye forever :< EOF @@ -447,6 +449,35 @@ "searching chopped($i) digit yielded result $wild "); } is($ro->query("m:Pine m:LNX m:10010260936330", {mset=>1})->size, 1); }); + +{ # List-Id searching + my $found = $ro->query('lid:i.m.just.bored'); + is_deeply([ filter_mids($found) ], [ 'root@s' ], + 'got expected mid on exact lid: search'); + + $found = $ro->query('lid:just.bored'); + is_deeply($found, [], 'got nothing on lid: search'); + + $found = $ro->query('lid:*.just.bored'); + is_deeply($found, [], 'got nothing on lid: search'); + + $found = $ro->query('l:i.m.just.bored'); + is_deeply([ filter_mids($found) ], [ 'root@s' ], + 'probabilistic search works on full List-Id contents'); + + $found = $ro->query('l:just.bored'); + is_deeply([ filter_mids($found) ], [ 'root@s' ], + 'probabilistic search works on partial List-Id contents'); + + $found = $ro->query('lid:mad'); + is_deeply($found, [], 'no match on phrase with lid:'); + + $found = $ro->query('lid:bored'); + is_deeply($found, [], 'no match on partial List-Id with lid:'); + + $found = $ro->query('l:nothing'); + is_deeply($found, [], 'matched on phrase with l:'); +} done_testing();