+$ibx->with_umask(sub {
+ $rw_commit->();
+ my $digits = '10010260936330';
+ my $ua = 'Pine.LNX.4.10';
+ my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com";
+ is($ibx->search->mset("m:$digits")->size, 0, 'no results yet');
+ my $pine = PublicInbox::Eml->new(<<EOF);
+Subject: blah
+Message-ID: <$mid>
+From: torvalds\@transmeta
+To: list\@example.com
+
+EOF
+ my $x = $rw->add_message($pine);
+ $rw->commit_txn_lazy;
+ $ibx->search->reopen;
+ is($ibx->search->mset("m:$digits")->size, 1,
+ 'searching only digit yielded result');
+
+ my $wild = $digits;
+ for my $i (1..6) {
+ chop($wild);
+ is($ibx->search->mset("m:$wild*")->size, 1,
+ "searching chopped($i) digit yielded result $wild ");
+ }
+ is($ibx->search->mset('m:Pine m:LNX m:10010260936330')->size, 1);
+});
+
+{ # List-Id searching
+ my $found = $query->('lid:i.m.just.bored');
+ is_deeply([ filter_mids($found) ], [ 'root@s' ],
+ 'got expected mid on exact lid: search');
+
+ $found = $query->('lid:just.bored');
+ is_deeply($found, [], 'got nothing on lid: search');
+
+ $found = $query->('lid:*.just.bored');
+ is_deeply($found, [], 'got nothing on lid: search');
+
+ $found = $query->('l:i.m.just.bored');
+ is_deeply([ filter_mids($found) ], [ 'root@s' ],
+ 'probabilistic search works on full List-Id contents');
+
+ $found = $query->('l:just.bored');
+ is_deeply([ filter_mids($found) ], [ 'root@s' ],
+ 'probabilistic search works on partial List-Id contents');
+
+ $found = $query->('lid:mad');
+ is_deeply($found, [], 'no match on phrase with lid:');
+
+ $found = $query->('lid:bored');
+ is_deeply($found, [], 'no match on partial List-Id with lid:');
+
+ $found = $query->('l:nothing');
+ is_deeply($found, [], 'matched on phrase with l:');
+}
+
+$ibx->with_umask(sub {
+ $rw_commit->();
+ my $doc_id = $rw->add_message(eml_load('t/data/message_embed.eml'));
+ ok($doc_id > 0, 'messages within messages');
+ $rw->commit_txn_lazy;
+ $ibx->search->reopen;
+ my $n_test_eml = $query->('n:test.eml');
+ is(scalar(@$n_test_eml), 1, 'got a result');
+ my $n_embed2x_eml = $query->('n:embed2x.eml');
+ is_deeply($n_test_eml, $n_embed2x_eml, '.eml filenames searchable');
+ for my $m (qw(20200418222508.GA13918@dcvr 20200418222020.GA2745@dcvr
+ 20200418214114.7575-1-e@yhbt.net)) {
+ is($query->("m:$m")->[0]->{mid},
+ '20200418222508.GA13918@dcvr', 'probabilistic m:'.$m);
+ is($query->("mid:$m")->[0]->{mid},
+ '20200418222508.GA13918@dcvr', 'boolean mid:'.$m);
+ }
+ is($query->('dfpost:4dc62c50')->[0]->{mid},
+ '20200418222508.GA13918@dcvr',
+ 'diff search reaches inside message/rfc822');
+ is($query->('s:"mail header experiments"')->[0]->{mid},
+ '20200418222508.GA13918@dcvr',
+ 'Subject search reaches inside message/rfc822');
+
+ $doc_id = $rw->add_message(eml_load('t/data/binary.patch'));
+ $rw->commit_txn_lazy;
+ $ibx->search->reopen;
+ my $res = $query->('HcmV');
+ is_deeply($res, [], 'no results against trailer');
+ $res = $query->('IcmZPo000310RR91');
+ is_deeply($res, [], 'no results against 1-byte binary patch');
+ $res = $query->('"GIT binary patch"');
+ is(scalar(@$res), 1, 'got binary result from "GIT binary patch"');
+ is($res->[0]->{mid}, 'binary-patch-test@example', 'msgid for binary');
+ my $s = $query->('"literal 1"');
+ is_deeply($s, $res, 'got binary result from exact literal size');
+ $s = $query->('"literal 2"');
+ is_deeply($s, [], 'no results for wrong size');
+});
+
+SKIP: {
+ my ($s, $g) = ($ibx->search, $ibx->git);
+ my $q = $s->query_argv_to_string($g, ["quoted phrase"]);
+ is($q, q["quoted phrase"], 'quoted phrase');
+ $q = $s->query_argv_to_string($g, ['s:pa ce']);
+ is($q, q[s:"pa ce"], 'space with prefix');
+ $q = $s->query_argv_to_string($g, ["\(s:pa ce", "AND", "foo\)"]);
+ is($q, q[(s:"pa ce" AND foo)], 'space AND foo');
+
+ local $ENV{TZ} = 'UTC';
+ my $now = strftime('%H:%M:%S', gmtime(time));
+ if ($now =~ /\A23:(?:59|60)/ || $now =~ /\A00:00:0[01]\z/) {
+ skip 'too close to midnight, time is tricky', 6;
+ }
+ $q = $s->query_argv_to_string($g, [qw(d:20101002 blah)]);
+ is($q, 'd:20101002..20101003 blah', 'YYYYMMDD expanded to range');
+ $q = $s->query_argv_to_string($g, [qw(d:2010-10-02)]);
+ is($q, 'd:20101002..20101003', 'YYYY-MM-DD expanded to range');
+ $q = $s->query_argv_to_string($g, [qw(rt:2010-10-02.. yy)]);
+ $q =~ /\Art:(\d+)\.\. yy/ or fail("rt: expansion failed: $q");
+ is(strftime('%Y-%m-%d', gmtime($1//0)), '2010-10-02', 'rt: beg expand');
+ $q = $s->query_argv_to_string($g, [qw(rt:..2010-10-02 zz)]);
+ $q =~ /\Art:\.\.(\d+) zz/ or fail("rt: expansion failed: $q");
+ is(strftime('%Y-%m-%d', gmtime($1//0)), '2010-10-02', 'rt: end expand');
+ $q = $s->query_argv_to_string($g, [qw(something dt:2010-10-02..)]);
+ like($q, qr/\Asomething dt:20101002\d{6}\.\./, 'dt: expansion');
+ $q = $s->query_argv_to_string($g, [qw(x dt:yesterday.. y)]);
+ my $exp = strftime('%Y%m%d', gmtime(time - 86400));
+ like($q, qr/x dt:$exp[0-9]{6}\.\. y/, '"yesterday" handled');
+ $q = $s->query_argv_to_string($g, [qw(x dt:20101002054123)]);
+ is($q, 'x dt:20101002054123..20101003054123', 'single dt: expanded');
+ $q = $s->query_argv_to_string($g, [qw(x dt:2010-10-02T05:41:23Z)]);
+ is($q, 'x dt:20101002054123..20101003054123', 'ISO8601 dt: expanded');
+ $q = $s->query_argv_to_string($g, [qw(rt:1970..1971)]);
+ $q =~ /\Art:(\d+)\.\.(\d+)\z/ or fail "YYYY rt: expansion: $q";
+ my ($beg, $end) = ($1, $2);
+ is(strftime('%Y', gmtime($beg)), 1970, 'rt: starts at 1970');
+ is(strftime('%Y', gmtime($end)), 1971, 'rt: ends at 1971');
+ $q = $s->query_argv_to_string($g, [qw(rt:1970-01-01)]);
+ $q =~ /\Art:(\d+)\.\.(\d+)\z/ or fail "YYYY-MM-DD rt: expansion: $q";
+ ($beg, $end) = ($1, $2);
+ is(strftime('%Y-%m-%d', gmtime($beg)), '1970-01-01',
+ 'rt: date-only w/o range');
+ is(strftime('%Y-%m-%d', gmtime($end)), '1970-01-02',
+ 'rt: date-only auto-end');
+ $q = $s->query_argv_to_string($g, [qw{OR (rt:1993-10-02)}]);
+ like($q, qr/\AOR \(rt:749\d{6}\.\.749\d{6}\)\z/,
+ 'trailing parentheses preserved');
+
+ my $qs = qq[f:bob rt:1993-10-02..2010-10-02];
+ $s->query_approxidate($g, $qs);
+ like($qs, qr/\Af:bob rt:749\d{6}\.\.128\d{7}\z/,
+ 'no phrases, no problem');
+
+ my $orig = $qs = qq[f:bob "d:1993-10-02..2010-10-02"];
+ $s->query_approxidate($g, $qs);
+ is($qs, $orig, 'phrase preserved');
+
+ $orig = $qs = qq[f:bob "d:1993-10-02..2010-10-02 "] .
+ qq["dt:1993-10-02..2010-10-02 " \x{201c}];
+ $s->query_approxidate($g, $qs);
+ is($qs, $orig, 'phrase preserved even with escaped ""');
+
+ $orig = $qs = qq[f:bob "hello world" d:1993-10-02..2010-10-02];
+ $s->query_approxidate($g, $qs);
+ is($qs, qq[f:bob "hello world" d:19931002..20101002],
+ 'post-phrase date corrected');
+
+ # Xapian uses "" to escape " inside phrases, we don't explictly
+ # handle that, but are able to pass the result through unchanged
+ for my $pair (["\x{201c}", "\x{201d}"], ['"', '"']) {
+ my ($x, $y) = @$pair;
+ $orig = $qs = qq[${x}hello d:1993-10-02.."" world$y];
+ $s->query_approxidate($g, $qs);
+ is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y));
+
+ $s->query_approxidate($g, my $tmp = "$qs d:..2010-10-02");
+ is($tmp, "$orig d:..20101002",
+ 'two phrases did not throw off date parsing');
+
+ $orig = $qs = qq[${x}hello d:1993-10-02..$y$x world$y];
+ $s->query_approxidate($g, $qs);
+ is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y));
+
+ $s->query_approxidate($g, $tmp = "$qs d:..2010-10-02");
+ is($tmp, "$orig d:..20101002",
+ 'two phrases did not throw off date parsing');
+ }
+
+ my $x_days_ago = strftime('%Y%m%d', gmtime(time - (5 * 86400)));
+ $orig = $qs = qq[broken d:5.days.ago..];
+ $s->query_approxidate($g, $qs);
+ like($qs, qr/\Abroken dt:$x_days_ago[0-9]{6}\.\./,
+ 'date.phrase.with.dots');
+
+ $orig = $qs = 'd:20101002..now';
+ $s->query_approxidate($g, $qs);
+ like($qs, qr/\Adt:20101002000000\.\.[0-9]{14}\z/,
+ 'approxidate on range-end only');
+
+ $ENV{TEST_EXPENSIVE} or
+ skip 'TEST_EXPENSIVE not set for argv overflow check', 1;
+ my @w;
+ local $SIG{__WARN__} = sub { push @w, @_ }; # for pure Perl version
+ my @fail = map { 'd:1993-10-02..2010-10-02' } (1..(4096 * 32));
+ eval { $s->query_argv_to_string($g, \@fail) };
+ ok($@, 'exception raised');
+}
+
+done_testing();