]> Sergey Matveev's repositories - public-inbox.git/blobdiff - t/search.t
search: support searching on List-Id
[public-inbox.git] / t / search.t
index 8508f27301443d28d39a57bd54ed86cd0a38300a..92f3305d556c5773fadc1e64f6e9f544904ff06f 100644 (file)
@@ -8,7 +8,7 @@ require_mods(qw(DBD::SQLite Search::Xapian));
 require PublicInbox::SearchIdx;
 require PublicInbox::Inbox;
 require PublicInbox::InboxWritable;
-use Email::MIME;
+use PublicInbox::MIME;
 my ($tmpdir, $for_destroy) = tmpdir();
 my $git_dir = "$tmpdir/a.git";
 my $ibx = PublicInbox::Inbox->new({ inboxdir => $git_dir });
@@ -60,27 +60,28 @@ sub oct_is ($$$) {
 }
 
 $ibx->with_umask(sub {
-       my $root = Email::MIME->create(
-               header_str => [
-                       Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
-                       Subject => 'Hello world',
-                       'Message-ID' => '<root@s>',
-                       From => 'John Smith <js@example.com>',
-                       To => 'list@example.com',
-               ],
-               body => "\\m/\n");
-       my $last = Email::MIME->create(
-               header_str => [
-                       Date => 'Sat, 02 Oct 2010 00:00:00 +0000',
-                       Subject => 'Re: Hello world',
-                       'In-Reply-To' => '<root@s>',
-                       'Message-ID' => '<last@s>',
-                       From => 'John Smith <js@example.com>',
-                       To => 'list@example.com',
-                       Cc => 'foo@example.com',
-               ],
-               body => "goodbye forever :<\n");
-
+       my $root = PublicInbox::MIME->new(<<'EOF');
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+Subject: Hello world
+Message-ID: <root@s>
+From: John Smith <js@example.com>
+To: list@example.com
+List-Id: I'm not mad <i.m.just.bored>
+
+\m/
+EOF
+       my $last = PublicInbox::MIME->new(<<'EOF');
+Date: Sat, 02 Oct 2010 00:00:00 +0000
+Subject: Re: Hello world
+In-Reply-To: <root@s>
+Message-ID: <last@s>
+From: John Smith <js@example.com>
+To: list@example.com
+Cc: foo@example.com
+List-Id: there's nothing <left.for.me.to.do>
+
+goodbye forever :<
+EOF
        my $rv;
        $rw_commit->();
        $root_id = $rw->add_message($root);
@@ -127,31 +128,29 @@ sub filter_mids {
 $ibx->with_umask(sub {
        $rw_commit->();
        my $rmid = '<ghost-message@s>';
-       my $reply_to_ghost = Email::MIME->create(
-               header_str => [
-                       Date => 'Sat, 02 Oct 2010 00:00:00 +0000',
-                       Subject => 'Re: ghosts',
-                       'Message-ID' => '<ghost-reply@s>',
-                       'In-Reply-To' => $rmid,
-                       From => 'Time Traveler <tt@example.com>',
-                       To => 'list@example.com',
-               ],
-               body => "-_-\n");
-
+       my $reply_to_ghost = PublicInbox::MIME->new(<<"EOF");
+Date: Sat, 02 Oct 2010 00:00:00 +0000
+Subject: Re: ghosts
+Message-ID: <ghost-reply\@s>
+In-Reply-To: $rmid
+From: Time Traveler <tt\@example.com>
+To: list\@example.com
+
+-_-
+EOF
        my $rv;
        my $reply_id = $rw->add_message($reply_to_ghost);
        is($reply_id, int($reply_id), "reply_id is an integer: $reply_id");
 
-       my $was_ghost = Email::MIME->create(
-               header_str => [
-                       Date => 'Sat, 02 Oct 2010 00:00:01 +0000',
-                       Subject => 'ghosts',
-                       'Message-ID' => $rmid,
-                       From => 'Laggy Sender <lag@example.com>',
-                       To => 'list@example.com',
-               ],
-               body => "are real\n");
+       my $was_ghost = PublicInbox::MIME->new(<<"EOF");
+Date: Sat, 02 Oct 2010 00:00:01 +0000
+Subject: ghosts
+Message-ID: $rmid
+From: Laggy Sender <lag\@example.com>
+To: list\@example.com
 
+are real
+EOF
        my $ghost_id = $rw->add_message($was_ghost);
        is($ghost_id, int($ghost_id), "ghost_id is an integer: $ghost_id");
        my $msgs = $rw->{over}->get_thread('ghost-message@s');
@@ -192,18 +191,17 @@ $ibx->with_umask(sub {
        $rw_commit->();
        $ro->reopen;
        my $long_mid = 'last' . ('x' x 60). '@s';
-
-       my $long = Email::MIME->create(
-               header_str => [
-                       Date => 'Sat, 02 Oct 2010 00:00:00 +0000',
-                       Subject => 'long message ID',
-                       'References' => '<root@s> <last@s>',
-                       'In-Reply-To' => '<last@s>',
-                       'Message-ID' => "<$long_mid>",
-                       From => '"Long I.D." <long-id@example.com>',
-                       To => 'list@example.com',
-               ],
-               body => "wut\n");
+       my $long = PublicInbox::MIME->new(<<EOF);
+Date: Sat, 02 Oct 2010 00:00:00 +0000
+Subject: long message ID
+References: <root\@s> <last\@s>
+In-Reply-To: <last\@s>
+Message-ID: <$long_mid>,
+From: "Long I.D." <long-id\@example.com>
+To: list\@example.com
+
+wut
+EOF
        my $long_id = $rw->add_message($long);
        is($long_id, int($long_id), "long_id is an integer: $long_id");
 
@@ -213,18 +211,16 @@ $ibx->with_umask(sub {
        my @res;
 
        my $long_reply_mid = 'reply-to-long@1';
-       my $long_reply = Email::MIME->create(
-               header_str => [
-                       Subject => 'I break references',
-                       Date => 'Sat, 02 Oct 2010 00:00:00 +0000',
-                       'Message-ID' => "<$long_reply_mid>",
-                       # No References:
-                       # 'References' => '<root@s> <last@s> <'.$long_mid.'>',
-                       'In-Reply-To' => "<$long_mid>",
-                       From => 'no1 <no1@example.com>',
-                       To => 'list@example.com',
-               ],
-               body => "no References\n");
+       my $long_reply = PublicInbox::MIME->new(<<EOF);
+Subject: I break references
+Date: Sat, 02 Oct 2010 00:00:00 +0000
+Message-ID: <$long_reply_mid>
+In-Reply-To: <$long_mid>
+From: no1 <no1\@example.com>
+To: list\@example.com
+
+no References
+EOF
        ok($rw->add_message($long_reply) > $long_id, "inserted long reply");
 
        $rw_commit->();
@@ -239,25 +235,26 @@ $ibx->with_umask(sub {
 # quote prioritization
 $ibx->with_umask(sub {
        $rw_commit->();
-       $rw->add_message(Email::MIME->create(
-               header_str => [
-                       Date => 'Sat, 02 Oct 2010 00:00:01 +0000',
-                       Subject => 'Hello',
-                       'Message-ID' => '<quote@a>',
-                       From => 'Quoter <quoter@example.com>',
-                       To => 'list@example.com',
-               ],
-               body => "> theatre illusions\nfade\n"));
-
-       $rw->add_message(Email::MIME->create(
-               header_str => [
-                       Date => 'Sat, 02 Oct 2010 00:00:02 +0000',
-                       Subject => 'Hello',
-                       'Message-ID' => '<nquote@a>',
-                       From => 'Non-Quoter<non-quoter@example.com>',
-                       To => 'list@example.com',
-               ],
-               body => "theatre\nfade\n"));
+       $rw->add_message(PublicInbox::MIME->new(<<'EOF'));
+Date: Sat, 02 Oct 2010 00:00:01 +0000
+Subject: Hello
+Message-ID: <quote@a>
+From: Quoter <quoter@example.com>
+To: list@example.com
+
+> theatre illusions
+fade
+EOF
+       $rw->add_message(PublicInbox::MIME->new(<<'EOF'));
+Date: Sat, 02 Oct 2010 00:00:02 +0000
+Subject: Hello
+Message-ID: <nquote@a>
+From: Non-Quoter<non-quoter@example.com>
+To: list@example.com
+
+theatre
+fade
+EOF
        my $res = $rw->query("theatre");
        is(scalar(@$res), 2, "got both matches");
        is($res->[0]->mid, 'nquote@a', "non-quoted scores higher") if scalar(@$res);
@@ -272,17 +269,17 @@ $ibx->with_umask(sub {
 # circular references
 $ibx->with_umask(sub {
        my $s = 'foo://'. ('Circle' x 15).'/foo';
-       my $doc_id = $rw->add_message(Email::MIME->create(
-               header => [ Subject => $s ],
-               header_str => [
-                       Date => 'Sat, 02 Oct 2010 00:00:01 +0000',
-                       'Message-ID' => '<circle@a>',
-                       'References' => '<circle@a>',
-                       'In-Reply-To' => '<circle@a>',
-                       From => 'Circle <circle@example.com>',
-                       To => 'list@example.com',
-               ],
-               body => "LOOP!\n"));
+       my $doc_id = $rw->add_message(PublicInbox::MIME->new(<<EOF));
+Subject: $s
+Date: Sat, 02 Oct 2010 00:00:01 +0000
+Message-ID: <circle\@a>
+References: <circle\@a>
+In-Reply-To: <circle\@a>
+From: Circle <circle\@example.com>
+To: list\@example.com
+
+LOOP!
+EOF
        ok($doc_id > 0, "doc_id defined with circular reference");
        my $smsg = $rw->query('m:circle@a', {limit=>1})->[0];
        is(defined($smsg), 1, 'found m:circl@a');
@@ -291,9 +288,7 @@ $ibx->with_umask(sub {
 });
 
 $ibx->with_umask(sub {
-       my $eml = 't/utf8.eml';
-       my $mime = PublicInbox::InboxWritable::mime_from_path($eml) or
-               die "open $eml: $!";
+       my $mime = mime_load 't/utf8.eml';
        my $doc_id = $rw->add_message($mime);
        ok($doc_id > 0, 'message indexed doc_id with UTF-8');
        my $msg = $rw->query('m:testmessage@example.com', {limit => 1})->[0];
@@ -376,35 +371,7 @@ $ibx->with_umask(sub {
 }
 
 $ibx->with_umask(sub {
-       my $part1 = Email::MIME->create(
-                 attributes => {
-                     content_type => 'text/plain',
-                     disposition  => 'attachment',
-                     charset => 'US-ASCII',
-                    encoding => 'quoted-printable',
-                    filename => 'attached_fart.txt',
-                 },
-                 body_str => 'inside the attachment',
-       );
-       my $part2 = Email::MIME->create(
-                 attributes => {
-                     content_type => 'text/plain',
-                     disposition  => 'attachment',
-                     charset => 'US-ASCII',
-                    encoding => 'quoted-printable',
-                    filename => 'part_deux.txt',
-                 },
-                 body_str => 'inside another',
-       );
-       my $amsg = Email::MIME->create(
-               header_str => [
-                       Subject => 'see attachment',
-                       'Message-ID' => '<file@attached>',
-                       From => 'John Smith <js@example.com>',
-                       To => 'list@example.com',
-               ],
-               parts => [ $part1, $part2 ],
-       );
+       my $amsg = mime_load 't/search-amsg.eml';
        ok($rw->add_message($amsg), 'added attachment');
        $rw_commit->();
        $ro->reopen;
@@ -462,15 +429,13 @@ $ibx->with_umask(sub {
        my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com";
        is($ro->reopen->query("m:$digits", { mset => 1})->size, 0,
                'no results yet');
-       my $pine = Email::MIME->create(
-               header_str => [
-                       Subject => 'blah',
-                       'Message-ID' => "<$mid>",
-                       From => 'torvalds@transmeta',
-                       To => 'list@example.com',
-               ],
-               body => ""
-       );
+       my $pine = PublicInbox::MIME->new(<<EOF);
+Subject: blah
+Message-ID: <$mid>
+From: torvalds\@transmeta
+To: list\@example.com
+
+EOF
        my $x = $rw->add_message($pine);
        $rw->commit_txn_lazy;
        is($ro->reopen->query("m:$digits", { mset => 1})->size, 1,
@@ -485,6 +450,35 @@ $ibx->with_umask(sub {
        is($ro->query("m:Pine m:LNX m:10010260936330", {mset=>1})->size, 1);
 });
 
+{ # List-Id searching
+       my $found = $ro->query('lid:i.m.just.bored');
+       is_deeply([ filter_mids($found) ], [ 'root@s' ],
+               'got expected mid on exact lid: search');
+
+       $found = $ro->query('lid:just.bored');
+       is_deeply($found, [], 'got nothing on lid: search');
+
+       $found = $ro->query('lid:*.just.bored');
+       is_deeply($found, [], 'got nothing on lid: search');
+
+       $found = $ro->query('l:i.m.just.bored');
+       is_deeply([ filter_mids($found) ], [ 'root@s' ],
+               'probabilistic search works on full List-Id contents');
+
+       $found = $ro->query('l:just.bored');
+       is_deeply([ filter_mids($found) ], [ 'root@s' ],
+               'probabilistic search works on partial List-Id contents');
+
+       $found = $ro->query('lid:mad');
+       is_deeply($found, [], 'no match on phrase with lid:');
+
+       $found = $ro->query('lid:bored');
+       is_deeply($found, [], 'no match on partial List-Id with lid:');
+
+       $found = $ro->query('l:nothing');
+       is_deeply($found, [], 'matched on phrase with l:');
+}
+
 done_testing();
 
 1;