X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=t%2Fextsearch.t;h=2d7375d63f9a6467f3b98b80da2715589ad4b573;hb=refs%2Fheads%2Fmaster;hp=26c3d4ae72ece18d18297478b77d6ed3e0e5443e;hpb=a1aa7a3bf55ca5aa6e12294bc1f864dba8e63269;p=public-inbox.git diff --git a/t/extsearch.t b/t/extsearch.t index 26c3d4ae..03bcad95 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -1,5 +1,5 @@ #!perl -w -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ use strict; use Test::More; @@ -13,8 +13,6 @@ require PublicInbox::Search; use_ok 'PublicInbox::ExtSearch'; use_ok 'PublicInbox::ExtSearchIdx'; use_ok 'PublicInbox::OverIdx'; -my $sock = tcp_server(); -my $host_port = $sock->sockhost . ':' . $sock->sockport; my ($home, $for_destroy) = tmpdir(); local $ENV{HOME} = $home; mkdir "$home/.public-inbox" or BAIL_OUT $!; @@ -54,12 +52,57 @@ run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT '-mda'; run_script([qw(-index -Lbasic), "$home/v1test"]) or BAIL_OUT "index $?"; -ok(run_script([qw(-extindex --all), "$home/extindex"]), 'extindex init'); +ok(run_script([qw(-extindex --dangerous --all), "$home/extindex"]), + 'extindex init'); { my $es = PublicInbox::ExtSearch->new("$home/extindex"); ok($es->has_threadid, '->has_threadid'); } +if ('with boost') { + xsys([qw(git config publicinbox.v1test.boost), 10], + { GIT_CONFIG => $cfg_path }); + ok(run_script([qw(-extindex --all), "$home/extindex-b"]), + 'extindex init with boost'); + my $es = PublicInbox::ExtSearch->new("$home/extindex-b"); + my $smsg = $es->over->get_art(1); + ok($smsg, 'got first article'); + my $xref3 = $es->over->get_xref3($smsg->{num}); + my @v1 = grep(/\Av1/, @$xref3); + my @v2 = grep(/\Av2/, @$xref3); + like($v1[0], qr/\Av1\.example.*?\b\Q$smsg->{blob}\E\b/, + 'smsg->{blob} respected boost'); + is(scalar(@$xref3), 2, 'only to entries'); + undef $es; + + xsys([qw(git config publicinbox.v2test.boost), 20], + { GIT_CONFIG => $cfg_path }); + ok(run_script([qw(-extindex --all --reindex), "$home/extindex-b"]), + 'extindex --reindex with altered boost'); + + $es = PublicInbox::ExtSearch->new("$home/extindex-b"); + $smsg = $es->over->get_art(1); + like($v2[0], qr/\Av2\.example.*?\b\Q$smsg->{blob}\E\b/, + 'smsg->{blob} respects boost after reindex'); + + # high boost added later + my $b2 = "$home/extindex-bb"; + ok(run_script([qw(-extindex), $b2, "$home/v1test"]), + 'extindex with low boost inbox only'); + ok(run_script([qw(-extindex), $b2, "$home/v2test"]), + 'extindex with high boost inbox only'); + $es = PublicInbox::ExtSearch->new($b2); + $smsg = $es->over->get_art(1); + $xref3 = $es->over->get_xref3($smsg->{num}); + like($v2[0], qr/\Av2\.example.*?\b\Q$smsg->{blob}\E\b/, + 'smsg->{blob} respected boost across 2 index runs'); + + xsys([qw(git config --unset publicinbox.v1test.boost)], + { GIT_CONFIG => $cfg_path }); + xsys([qw(git config --unset publicinbox.v2test.boost)], + { GIT_CONFIG => $cfg_path }); +} + { # TODO: -extindex should write this to config open $fh, '>>', $cfg_path or BAIL_OUT $!; print $fh < $sock }); @@ -132,7 +177,7 @@ if ('inbox edited') { is($mset->size, 1, 'new message found'); $mset = $es->mset('b:"test message"'); is($mset->size, 1, 'old message found'); - delete @$es{qw(git over xdb)}; # fork preparation + delete @$es{qw(git over xdb qp)}; # fork preparation my $pi_cfg = PublicInbox::Config->new; $pi_cfg->fill_all; @@ -236,6 +281,7 @@ if ('inject w/o indexing') { if ('reindex catches missed messages') { my $v2ibx = $cfg->lookup_name('v2test'); + $v2ibx->{-no_fsync} = 1; my $im = PublicInbox::InboxWritable->new($v2ibx)->importer(0); my $cmt_a = $v2ibx->mm->last_commit_xap($schema_version, 0); my $eml = eml_load('t/data/0001.patch'); @@ -263,12 +309,17 @@ if ('reindex catches missed messages') { is($oidx->eidx_meta($lc_key), $cmt_b, 'lc-v2 stays unchanged'); my @err = split(/^/, $err); is(scalar(@err), 1, 'only one warning') or diag "err=$err"; - like($err[0], qr/I: reindex_unseen/, 'got reindex_unseen message'); + like($err[0], qr/# reindex_unseen/, 'got reindex_unseen message'); my $new = $oidx->get_art($max + 1); is($new->{subject}, $eml->header('Subject'), 'new message added'); $es->{xdb}->reopen; - my $mset = $es->mset("mid:$new->{mid}"); + # git patch-id --stable search->mset("patchid:$patchid"); + is($mset->size, 1, 'patchid search works'); + + $mset = $es->mset("mid:$new->{mid}"); is($mset->size, 1, 'previously unseen, now indexed in Xapian'); ok($im->remove($eml), 'remove new message from v2 inbox'); @@ -291,11 +342,17 @@ if ('reindex catches missed messages') { $es->{xdb}->reopen; $mset = $es->mset("mid:$new->{mid}"); is($mset->size, 0, 'stale mid gone Xapian'); + + ok(run_script([qw(-extindex --reindex --all --fast), "$home/extindex"], + undef, $opt), '--reindex w/ --fast'); + ok(!run_script([qw(-extindex --all --fast), "$home/extindex"], + undef, $opt), '--fast alone makes no sense'); } if ('reindex catches content bifurcation') { use PublicInbox::MID qw(mids); my $v2ibx = $cfg->lookup_name('v2test'); + $v2ibx->{-no_fsync} = 1; my $im = PublicInbox::InboxWritable->new($v2ibx)->importer(0); my $eml = eml_load('t/data/message_embed.eml'); my $cmt_a = $v2ibx->mm->last_commit_xap($schema_version, 0); @@ -324,7 +381,7 @@ if ('reindex catches content bifurcation') { is($oidx->max, $oldmax, 'oidx->max unchanged'); $oidx->dbh_close; ok(run_script([qw(-extindex --reindex --all), "$home/extindex"], - undef, $opt), 'extindex --reindex'); + undef, $opt), 'extindex --reindex') or diag explain($opt); $oidx->dbh; ok($oidx->max > $oldmax, 'oidx->max bumped'); like($err, qr/split into 2 due to deduplication change/, @@ -358,12 +415,143 @@ if ('remove v1test and test gc') { my $opt = { 2 => \(my $err = '') }; ok(run_script([qw(-extindex --gc), "$home/extindex"], undef, $opt), 'extindex --gc'); - like($err, qr/^I: remove #1 v1\.example /ms, 'removed v1 message'); - is(scalar(grep(!/^I:/, split(/^/m, $err))), 0, + like($err, qr/^# remove #1 v1\.example /ms, 'removed v1 message'); + is(scalar(grep(!/^#/, split(/^/m, $err))), 0, 'no non-informational messages'); $misc->{xdb}->reopen; @it = $misc->mset('')->items; is(scalar(@it), 1, 'only one inbox left'); } +if ('dedupe + dry-run') { + my @cmd = ('-extindex', "$home/extindex"); + my $opt = { 2 => \(my $err = '') }; + ok(run_script([@cmd, '--dedupe'], undef, $opt), '--dedupe'); + ok(run_script([@cmd, qw(--dedupe --dry-run)], undef, $opt), + '--dry-run --dedupe'); + is $err, '', 'no errors'; + ok(!run_script([@cmd, qw(--dry-run)], undef, $opt), + '--dry-run alone fails'); +} + +# chmod 0755, $home or xbail "chmod: $!"; +for my $j (1, 3, 6) { + my $o = { 2 => \(my $err = '') }; + my $d = "$home/extindex-j$j"; + ok(run_script(['-extindex', "-j$j", '--all', $d], undef, $o), + "init with -j$j"); + my $max = $j - 2; + $max = 0 if $max < 0; + my @dirs = glob("$d/ei*/?"); + like($dirs[-1], qr!/ei[0-9]+/$max\z!, '-j works'); +} + +SKIP: { + my $d = "$home/extindex-j1"; + my $es = PublicInbox::ExtSearch->new($d); + ok(my $nresult0 = $es->mset('z:0..')->size, 'got results'); + ok(ref($es->{xdb}), '{xdb} created'); + my $nshards1 = $es->{nshard}; + is($nshards1, 1, 'correct shard count'); + + my @ei_dir = glob("$d/ei*/"); + chmod 0755, $ei_dir[0] or xbail "chmod: $!"; + my $mode = sprintf('%04o', 07777 & (stat($ei_dir[0]))[2]); + is($mode, '0755', 'mode set on ei*/ dir'); + my $o = { 2 => \(my $err = '') }; + ok(run_script([qw(-xcpdb -R4), $d]), 'xcpdb R4'); + my @dirs = glob("$d/ei*/?"); + for my $i (0..3) { + is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] created"); + my $m = sprintf('%04o', 07777 & (stat($dirs[$i]))[2]); + is($m, $mode, "shard [$i] mode"); + } + delete @$es{qw(xdb qp)}; + is($es->mset('z:0..')->size, $nresult0, 'new shards, same results'); + + for my $i (4..5) { + is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]"); + } + + ok(run_script([qw(-xcpdb -R2), $d]), 'xcpdb -R2'); + @dirs = glob("$d/ei*/?"); + for my $i (0..1) { + is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] kept"); + } + for my $i (2..3) { + is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]"); + } + skip 'xapian-compact missing', 4 unless have_xapian_compact; + ok(run_script([qw(-compact), $d], undef, $o), 'compact'); + # n.b. stderr contains xapian-compact output + + my @d2 = glob("$d/ei*/?"); + is_deeply(\@d2, \@dirs, 'dirs consistent after compact'); + ok(run_script([qw(-extindex --dedupe --all), $d]), + '--dedupe works after compact'); + ok(run_script([qw(-extindex --gc), $d], undef, $o), + '--gc works after compact'); +} + +{ # ensure --gc removes non-xposted messages + my $old_size = -s $cfg_path // xbail "stat $cfg_path $!"; + my $tmp_addr = 'v2tmp@example.com'; + run_script([qw(-init v2tmp --indexlevel basic + --newsgroup v2tmp.example), + "$home/v2tmp", 'http://example.com/v2tmp', $tmp_addr ]) + or xbail '-init'; + $env = { ORIGINAL_RECIPIENT => $tmp_addr }; + open $fh, '+>', undef or xbail "open $!"; + $fh->autoflush(1); + my $mid = 'tmpmsg@example.com'; + print $fh < +Subject: tmpmsg +Date: Tue, 19 Jan 2038 03:14:07 +0000 + +EOM + seek $fh, 0, SEEK_SET or xbail "seek $!"; + run_script([qw(-mda --no-precheck)], $env, {0 => $fh}) or xbail '-mda'; + ok(run_script([qw(-extindex --all), "$home/extindex"]), 'update'); + my $nr; + { + my $es = PublicInbox::ExtSearch->new("$home/extindex"); + my ($id, $prv); + my $smsg = $es->over->next_by_mid($mid, \$id, \$prv); + ok($smsg, 'tmpmsg indexed'); + my $mset = $es->search->mset("mid:$mid"); + is($mset->size, 1, 'new message found'); + $mset = $es->search->mset('z:0..'); + $nr = $mset->size; + } + truncate($cfg_path, $old_size) or xbail "truncate $!"; + my $rdr = { 2 => \(my $err) }; + ok(run_script([qw(-extindex --gc), "$home/extindex"], undef, $rdr), + 'gc to get rid of removed inbox'); + is_deeply([ grep(!/^(?:I:|#)/, split(/^/m, $err)) ], [], + 'no non-informational errors in stderr'); + + my $es = PublicInbox::ExtSearch->new("$home/extindex"); + my $mset = $es->search->mset("mid:$mid"); + is($mset->size, 0, 'tmpmsg gone from search'); + my ($id, $prv); + is($es->over->next_by_mid($mid, \$id, \$prv), undef, + 'tmpmsg gone from over'); + $id = $prv = undef; + is($es->over->next_by_mid('testmessage@example.com', \$id, \$prv), + undef, 'remaining message not indavderover'); + $mset = $es->search->mset('z:0..'); + is($mset->size, $nr - 1, 'existing messages not clobbered from search'); + my $o = $es->over->{dbh}->selectall_arrayref(<size, 'over row count matches Xapian'); + my $x = $es->over->{dbh}->selectall_arrayref(<