X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=t%2Fextsearch.t;h=2d7375d63f9a6467f3b98b80da2715589ad4b573;hb=refs%2Fheads%2Fmaster;hp=1f62e80c8c1a2e7adcbcbbc3ff71d7506f5dee2d;hpb=7a85c3cd6c7c0257a04343ca345cf0f23d71729c;p=public-inbox.git diff --git a/t/extsearch.t b/t/extsearch.t index 1f62e80c..03bcad95 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -1,5 +1,5 @@ #!perl -w -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ use strict; use Test::More; @@ -13,8 +13,6 @@ require PublicInbox::Search; use_ok 'PublicInbox::ExtSearch'; use_ok 'PublicInbox::ExtSearchIdx'; use_ok 'PublicInbox::OverIdx'; -my $sock = tcp_server(); -my $host_port = tcp_host_port($sock); my ($home, $for_destroy) = tmpdir(); local $ENV{HOME} = $home; mkdir "$home/.public-inbox" or BAIL_OUT $!; @@ -54,7 +52,8 @@ run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT '-mda'; run_script([qw(-index -Lbasic), "$home/v1test"]) or BAIL_OUT "index $?"; -ok(run_script([qw(-extindex --all), "$home/extindex"]), 'extindex init'); +ok(run_script([qw(-extindex --dangerous --all), "$home/extindex"]), + 'extindex init'); { my $es = PublicInbox::ExtSearch->new("$home/extindex"); ok($es->has_threadid, '->has_threadid'); @@ -86,6 +85,18 @@ if ('with boost') { like($v2[0], qr/\Av2\.example.*?\b\Q$smsg->{blob}\E\b/, 'smsg->{blob} respects boost after reindex'); + # high boost added later + my $b2 = "$home/extindex-bb"; + ok(run_script([qw(-extindex), $b2, "$home/v1test"]), + 'extindex with low boost inbox only'); + ok(run_script([qw(-extindex), $b2, "$home/v2test"]), + 'extindex with high boost inbox only'); + $es = PublicInbox::ExtSearch->new($b2); + $smsg = $es->over->get_art(1); + $xref3 = $es->over->get_xref3($smsg->{num}); + like($v2[0], qr/\Av2\.example.*?\b\Q$smsg->{blob}\E\b/, + 'smsg->{blob} respected boost across 2 index runs'); + xsys([qw(git config --unset publicinbox.v1test.boost)], { GIT_CONFIG => $cfg_path }); xsys([qw(git config --unset publicinbox.v2test.boost)], @@ -112,6 +123,8 @@ EOF SKIP: { require_mods(qw(Net::NNTP), 1); + my $sock = tcp_server(); + my $host_port = tcp_host_port($sock); my ($out, $err) = ("$home/nntpd.out.log", "$home/nntpd.err.log"); my $cmd = [ '-nntpd', '-W0', "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, undef, { 3 => $sock }); @@ -164,7 +177,7 @@ if ('inbox edited') { is($mset->size, 1, 'new message found'); $mset = $es->mset('b:"test message"'); is($mset->size, 1, 'old message found'); - delete @$es{qw(git over xdb)}; # fork preparation + delete @$es{qw(git over xdb qp)}; # fork preparation my $pi_cfg = PublicInbox::Config->new; $pi_cfg->fill_all; @@ -296,12 +309,17 @@ if ('reindex catches missed messages') { is($oidx->eidx_meta($lc_key), $cmt_b, 'lc-v2 stays unchanged'); my @err = split(/^/, $err); is(scalar(@err), 1, 'only one warning') or diag "err=$err"; - like($err[0], qr/I: reindex_unseen/, 'got reindex_unseen message'); + like($err[0], qr/# reindex_unseen/, 'got reindex_unseen message'); my $new = $oidx->get_art($max + 1); is($new->{subject}, $eml->header('Subject'), 'new message added'); $es->{xdb}->reopen; - my $mset = $es->mset("mid:$new->{mid}"); + # git patch-id --stable search->mset("patchid:$patchid"); + is($mset->size, 1, 'patchid search works'); + + $mset = $es->mset("mid:$new->{mid}"); is($mset->size, 1, 'previously unseen, now indexed in Xapian'); ok($im->remove($eml), 'remove new message from v2 inbox'); @@ -324,6 +342,11 @@ if ('reindex catches missed messages') { $es->{xdb}->reopen; $mset = $es->mset("mid:$new->{mid}"); is($mset->size, 0, 'stale mid gone Xapian'); + + ok(run_script([qw(-extindex --reindex --all --fast), "$home/extindex"], + undef, $opt), '--reindex w/ --fast'); + ok(!run_script([qw(-extindex --all --fast), "$home/extindex"], + undef, $opt), '--fast alone makes no sense'); } if ('reindex catches content bifurcation') { @@ -358,7 +381,7 @@ if ('reindex catches content bifurcation') { is($oidx->max, $oldmax, 'oidx->max unchanged'); $oidx->dbh_close; ok(run_script([qw(-extindex --reindex --all), "$home/extindex"], - undef, $opt), 'extindex --reindex'); + undef, $opt), 'extindex --reindex') or diag explain($opt); $oidx->dbh; ok($oidx->max > $oldmax, 'oidx->max bumped'); like($err, qr/split into 2 due to deduplication change/, @@ -392,8 +415,8 @@ if ('remove v1test and test gc') { my $opt = { 2 => \(my $err = '') }; ok(run_script([qw(-extindex --gc), "$home/extindex"], undef, $opt), 'extindex --gc'); - like($err, qr/^I: remove #1 v1\.example /ms, 'removed v1 message'); - is(scalar(grep(!/^I:/, split(/^/m, $err))), 0, + like($err, qr/^# remove #1 v1\.example /ms, 'removed v1 message'); + is(scalar(grep(!/^#/, split(/^/m, $err))), 0, 'no non-informational messages'); $misc->{xdb}->reopen; @it = $misc->mset('')->items; @@ -411,6 +434,7 @@ if ('dedupe + dry-run') { '--dry-run alone fails'); } +# chmod 0755, $home or xbail "chmod: $!"; for my $j (1, 3, 6) { my $o = { 2 => \(my $err = '') }; my $d = "$home/extindex-j$j"; @@ -422,4 +446,112 @@ for my $j (1, 3, 6) { like($dirs[-1], qr!/ei[0-9]+/$max\z!, '-j works'); } +SKIP: { + my $d = "$home/extindex-j1"; + my $es = PublicInbox::ExtSearch->new($d); + ok(my $nresult0 = $es->mset('z:0..')->size, 'got results'); + ok(ref($es->{xdb}), '{xdb} created'); + my $nshards1 = $es->{nshard}; + is($nshards1, 1, 'correct shard count'); + + my @ei_dir = glob("$d/ei*/"); + chmod 0755, $ei_dir[0] or xbail "chmod: $!"; + my $mode = sprintf('%04o', 07777 & (stat($ei_dir[0]))[2]); + is($mode, '0755', 'mode set on ei*/ dir'); + my $o = { 2 => \(my $err = '') }; + ok(run_script([qw(-xcpdb -R4), $d]), 'xcpdb R4'); + my @dirs = glob("$d/ei*/?"); + for my $i (0..3) { + is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] created"); + my $m = sprintf('%04o', 07777 & (stat($dirs[$i]))[2]); + is($m, $mode, "shard [$i] mode"); + } + delete @$es{qw(xdb qp)}; + is($es->mset('z:0..')->size, $nresult0, 'new shards, same results'); + + for my $i (4..5) { + is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]"); + } + + ok(run_script([qw(-xcpdb -R2), $d]), 'xcpdb -R2'); + @dirs = glob("$d/ei*/?"); + for my $i (0..1) { + is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] kept"); + } + for my $i (2..3) { + is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]"); + } + skip 'xapian-compact missing', 4 unless have_xapian_compact; + ok(run_script([qw(-compact), $d], undef, $o), 'compact'); + # n.b. stderr contains xapian-compact output + + my @d2 = glob("$d/ei*/?"); + is_deeply(\@d2, \@dirs, 'dirs consistent after compact'); + ok(run_script([qw(-extindex --dedupe --all), $d]), + '--dedupe works after compact'); + ok(run_script([qw(-extindex --gc), $d], undef, $o), + '--gc works after compact'); +} + +{ # ensure --gc removes non-xposted messages + my $old_size = -s $cfg_path // xbail "stat $cfg_path $!"; + my $tmp_addr = 'v2tmp@example.com'; + run_script([qw(-init v2tmp --indexlevel basic + --newsgroup v2tmp.example), + "$home/v2tmp", 'http://example.com/v2tmp', $tmp_addr ]) + or xbail '-init'; + $env = { ORIGINAL_RECIPIENT => $tmp_addr }; + open $fh, '+>', undef or xbail "open $!"; + $fh->autoflush(1); + my $mid = 'tmpmsg@example.com'; + print $fh < +Subject: tmpmsg +Date: Tue, 19 Jan 2038 03:14:07 +0000 + +EOM + seek $fh, 0, SEEK_SET or xbail "seek $!"; + run_script([qw(-mda --no-precheck)], $env, {0 => $fh}) or xbail '-mda'; + ok(run_script([qw(-extindex --all), "$home/extindex"]), 'update'); + my $nr; + { + my $es = PublicInbox::ExtSearch->new("$home/extindex"); + my ($id, $prv); + my $smsg = $es->over->next_by_mid($mid, \$id, \$prv); + ok($smsg, 'tmpmsg indexed'); + my $mset = $es->search->mset("mid:$mid"); + is($mset->size, 1, 'new message found'); + $mset = $es->search->mset('z:0..'); + $nr = $mset->size; + } + truncate($cfg_path, $old_size) or xbail "truncate $!"; + my $rdr = { 2 => \(my $err) }; + ok(run_script([qw(-extindex --gc), "$home/extindex"], undef, $rdr), + 'gc to get rid of removed inbox'); + is_deeply([ grep(!/^(?:I:|#)/, split(/^/m, $err)) ], [], + 'no non-informational errors in stderr'); + + my $es = PublicInbox::ExtSearch->new("$home/extindex"); + my $mset = $es->search->mset("mid:$mid"); + is($mset->size, 0, 'tmpmsg gone from search'); + my ($id, $prv); + is($es->over->next_by_mid($mid, \$id, \$prv), undef, + 'tmpmsg gone from over'); + $id = $prv = undef; + is($es->over->next_by_mid('testmessage@example.com', \$id, \$prv), + undef, 'remaining message not indavderover'); + $mset = $es->search->mset('z:0..'); + is($mset->size, $nr - 1, 'existing messages not clobbered from search'); + my $o = $es->over->{dbh}->selectall_arrayref(<size, 'over row count matches Xapian'); + my $x = $es->over->{dbh}->selectall_arrayref(<