]> Sergey Matveev's repositories - public-inbox.git/commitdiff
search: more granular message body searching
authorEric Wong <e@80x24.org>
Fri, 9 Sep 2016 00:01:24 +0000 (00:01 +0000)
committerEric Wong <e@80x24.org>
Fri, 9 Sep 2016 00:02:18 +0000 (00:02 +0000)
"bs:" and "b:" are adapted from mairix(1)

We will also support searching explicitly for quoted vs
non-quoted text via "q:" and "nq:" prefixes since sometimes
readers will not care for quoted text.

In the future, we will support parsing diffs (perhaps when
repobrowse integration is complete).

Note: this roughly doubles the size of the Xapian database due
to the additional information; so this change may not be worth
it.

lib/PublicInbox/Search.pm
lib/PublicInbox/SearchIdx.pm
t/search.t

index 3b25b6625005d646dcd422ba444bfc1ae6295947..f74129d5da1c382fa75dd38f8ca634916349af81 100644 (file)
@@ -58,16 +58,22 @@ my %bool_pfx_external = (
 );
 
 my %prob_prefix = (
-       s => 'S', # for mairix compatibility
+       # for mairix compatibility
+       s => 'S',
        m => 'Q', # 'mid' is exact, 'm' can do partial
-       f => 'A', # for mairix compatibility
-       t => 'XTO', # for mairix compatibility
-       tc => 'XTC', # for mairix compatibility
-       c => 'XCC', # for mairix compatibility
-       tcf => 'XTCF', # for mairix compatibility
+       f => 'A',
+       t => 'XTO',
+       tc => 'XTC',
+       c => 'XCC',
+       tcf => 'XTCF',
+       b => 'XBODY',
+       bs => 'XBS',
+
        # n.b.: leaving out "a:" alias for "tcf:" even though
        # mairix supports it.  It is only mentioned in passing in mairix(1)
        # and the extra two letters are not significantly longer.
+       q => 'XQUOT',
+       nq => 'XNQ',
 );
 
 # not documenting m: and mid: for now, the using the URLs works w/o Xapian
index 37fefbeaff2a05ef5a225475fbbabd4e651adbb0..cd27a29459e2360d1607cc7267d910af3aa8acb4 100644 (file)
@@ -173,7 +173,10 @@ sub add_message {
                my $tg = $self->term_generator;
 
                $tg->set_document($doc);
-               $tg->index_text($subj, 1, 'S') if $subj;
+               if ($subj) {
+                       $tg->index_text($subj, 1, 'S');
+                       $tg->index_text($subj, 1, 'XBS');
+               }
                $tg->increase_termpos;
                $tg->index_text($subj) if $subj;
                $tg->increase_termpos;
@@ -199,13 +202,21 @@ sub add_message {
                                }
                        }
                        if (@quot) {
-                               $tg->index_text(join("\n", @quot), 0);
+                               my $s = join("\n", @quot);
                                @quot = ();
+                               $tg->index_text($s, 1, 'XQUOT');
+                               $tg->index_text($s, 0, 'XBS');
+                               $tg->index_text($s, 0, 'XBODY');
+                               $tg->index_text($s, 0);
                                $tg->increase_termpos;
                        }
                        if (@orig) {
-                               $tg->index_text(join("\n", @orig));
+                               my $s = join("\n", @orig);
                                @orig = ();
+                               $tg->index_text($s, 1, 'XNQ');
+                               $tg->index_text($s, 1, 'XBS');
+                               $tg->index_text($s, 1, 'XBODY');
+                               $tg->index_text($s);
                                $tg->increase_termpos;
                        }
                });
index 7abaf8321530c90c054ce58c1d687036ff4b31c2..bddb545a59b0a682ffb5f40c59cd46cb8811f708 100644 (file)
@@ -361,6 +361,31 @@ sub filter_mids {
        }
 }
 
+{
+       $rw_commit->();
+       $ro->reopen;
+       my $res = $ro->query('b:hello');
+       is(scalar @{$res->{msgs}}, 0, 'no match on body search only');
+       $res = $ro->query('bs:smith');
+       is(scalar @{$res->{msgs}}, 0,
+               'no match on body+subject search for From');
+
+       $res = $ro->query('q:theatre');
+       is(scalar @{$res->{msgs}}, 1, 'only one quoted body');
+       like($res->{msgs}->[0]->from, qr/\AQuoter/, 'got quoted body');
+
+       $res = $ro->query('nq:theatre');
+       is(scalar @{$res->{msgs}}, 1, 'only one non-quoted body');
+       like($res->{msgs}->[0]->from, qr/\ANon-Quoter/, 'got non-quoted body');
+
+       foreach my $pfx (qw(b: bs:)) {
+               $res = $ro->query($pfx . 'theatre');
+               is(scalar @{$res->{msgs}}, 2, "searched both bodies for $pfx");
+               like($res->{msgs}->[0]->from, qr/\ANon-Quoter/,
+                       "non-quoter first for $pfx");
+       }
+}
+
 done_testing();
 
 1;