#!perl -w
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use Test::More;
use PublicInbox::TestCommon;
use PublicInbox::Config;
-use PublicInbox::Search;
use PublicInbox::InboxWritable;
use Fcntl qw(:seek);
-my $json = PublicInbox::Config::json() or plan skip_all => 'JSON missing';
require_git(2.6);
-require_mods(qw(DBD::SQLite Search::Xapian));
+require_mods(qw(json DBD::SQLite Search::Xapian));
+require PublicInbox::Search;
use_ok 'PublicInbox::ExtSearch';
use_ok 'PublicInbox::ExtSearchIdx';
use_ok 'PublicInbox::OverIdx';
my $sock = tcp_server();
-my $host_port = $sock->sockhost . ':' . $sock->sockport;
+my $host_port = tcp_host_port($sock);
my ($home, $for_destroy) = tmpdir();
local $ENV{HOME} = $home;
mkdir "$home/.public-inbox" or BAIL_OUT $!;
ok($es->has_threadid, '->has_threadid');
}
+if ('with boost') {
+ xsys([qw(git config publicinbox.v1test.boost), 10],
+ { GIT_CONFIG => $cfg_path });
+ ok(run_script([qw(-extindex --all), "$home/extindex-b"]),
+ 'extindex init with boost');
+ my $es = PublicInbox::ExtSearch->new("$home/extindex-b");
+ my $smsg = $es->over->get_art(1);
+ ok($smsg, 'got first article');
+ my $xref3 = $es->over->get_xref3($smsg->{num});
+ my @v1 = grep(/\Av1/, @$xref3);
+ my @v2 = grep(/\Av2/, @$xref3);
+ like($v1[0], qr/\Av1\.example.*?\b\Q$smsg->{blob}\E\b/,
+ 'smsg->{blob} respected boost');
+ is(scalar(@$xref3), 2, 'only to entries');
+ undef $es;
+
+ xsys([qw(git config publicinbox.v2test.boost), 20],
+ { GIT_CONFIG => $cfg_path });
+ ok(run_script([qw(-extindex --all --reindex), "$home/extindex-b"]),
+ 'extindex --reindex with altered boost');
+
+ $es = PublicInbox::ExtSearch->new("$home/extindex-b");
+ $smsg = $es->over->get_art(1);
+ like($v2[0], qr/\Av2\.example.*?\b\Q$smsg->{blob}\E\b/,
+ 'smsg->{blob} respects boost after reindex');
+
+ # high boost added later
+ my $b2 = "$home/extindex-bb";
+ ok(run_script([qw(-extindex), $b2, "$home/v1test"]),
+ 'extindex with low boost inbox only');
+ ok(run_script([qw(-extindex), $b2, "$home/v2test"]),
+ 'extindex with high boost inbox only');
+ $es = PublicInbox::ExtSearch->new($b2);
+ $smsg = $es->over->get_art(1);
+ $xref3 = $es->over->get_xref3($smsg->{num});
+ like($v2[0], qr/\Av2\.example.*?\b\Q$smsg->{blob}\E\b/,
+ 'smsg->{blob} respected boost across 2 index runs');
+
+ xsys([qw(git config --unset publicinbox.v1test.boost)],
+ { GIT_CONFIG => $cfg_path });
+ xsys([qw(git config --unset publicinbox.v2test.boost)],
+ { GIT_CONFIG => $cfg_path });
+}
+
{ # TODO: -extindex should write this to config
open $fh, '>>', $cfg_path or BAIL_OUT $!;
print $fh <<EOF or BAIL_OUT $!;
if ('reindex catches missed messages') {
my $v2ibx = $cfg->lookup_name('v2test');
+ $v2ibx->{-no_fsync} = 1;
my $im = PublicInbox::InboxWritable->new($v2ibx)->importer(0);
my $cmt_a = $v2ibx->mm->last_commit_xap($schema_version, 0);
my $eml = eml_load('t/data/0001.patch');
if ('reindex catches content bifurcation') {
use PublicInbox::MID qw(mids);
my $v2ibx = $cfg->lookup_name('v2test');
+ $v2ibx->{-no_fsync} = 1;
my $im = PublicInbox::InboxWritable->new($v2ibx)->importer(0);
my $eml = eml_load('t/data/message_embed.eml');
my $cmt_a = $v2ibx->mm->last_commit_xap($schema_version, 0);
is(scalar(@it), 1, 'only one inbox left');
}
+if ('dedupe + dry-run') {
+ my @cmd = ('-extindex', "$home/extindex");
+ my $opt = { 2 => \(my $err = '') };
+ ok(run_script([@cmd, '--dedupe'], undef, $opt), '--dedupe');
+ ok(run_script([@cmd, qw(--dedupe --dry-run)], undef, $opt),
+ '--dry-run --dedupe');
+ is $err, '', 'no errors';
+ ok(!run_script([@cmd, qw(--dry-run)], undef, $opt),
+ '--dry-run alone fails');
+}
+
+for my $j (1, 3, 6) {
+ my $o = { 2 => \(my $err = '') };
+ my $d = "$home/extindex-j$j";
+ ok(run_script(['-extindex', "-j$j", '--all', $d], undef, $o),
+ "init with -j$j");
+ my $max = $j - 2;
+ $max = 0 if $max < 0;
+ my @dirs = glob("$d/ei*/?");
+ like($dirs[-1], qr!/ei[0-9]+/$max\z!, '-j works');
+}
+
+SKIP: {
+ my $d = "$home/extindex-j1";
+ my $o = { 2 => \(my $err = '') };
+ ok(run_script([qw(-xcpdb -R4), $d]), 'xcpdb R4');
+ my @dirs = glob("$d/ei*/?");
+ for my $i (0..3) {
+ is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] created");
+ }
+ for my $i (4..5) {
+ is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]");
+ }
+
+ ok(run_script([qw(-xcpdb -R2), $d]), 'xcpdb -R2');
+ @dirs = glob("$d/ei*/?");
+ for my $i (0..1) {
+ is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] kept");
+ }
+ for my $i (2..3) {
+ is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]");
+ }
+ skip 'xapian-compact missing', 4 unless have_xapian_compact;
+ ok(run_script([qw(-compact), $d], undef, $o), 'compact');
+ # n.b. stderr contains xapian-compact output
+
+ my @d2 = glob("$d/ei*/?");
+ is_deeply(\@d2, \@dirs, 'dirs consistent after compact');
+ ok(run_script([qw(-extindex --dedupe --all), $d]),
+ '--dedupe works after compact');
+ ok(run_script([qw(-extindex --gc), $d], undef, $o),
+ '--gc works after compact');
+}
+
+{ # ensure --gc removes non-xposted messages
+ my $old_size = -s $cfg_path // xbail "stat $cfg_path $!";
+ my $tmp_addr = 'v2tmp@example.com';
+ run_script([qw(-init v2tmp --indexlevel basic
+ --newsgroup v2tmp.example),
+ "$home/v2tmp", 'http://example.com/v2tmp', $tmp_addr ])
+ or xbail '-init';
+ $env = { ORIGINAL_RECIPIENT => $tmp_addr };
+ open $fh, '+>', undef or xbail "open $!";
+ $fh->autoflush(1);
+ my $mid = 'tmpmsg@example.com';
+ print $fh <<EOM or xbail "print $!";
+From: b\@z
+To: b\@r
+Message-Id: <$mid>
+Subject: tmpmsg
+Date: Tue, 19 Jan 2038 03:14:07 +0000
+
+EOM
+ seek $fh, 0, SEEK_SET or xbail "seek $!";
+ run_script([qw(-mda --no-precheck)], $env, {0 => $fh}) or xbail '-mda';
+ ok(run_script([qw(-extindex --all), "$home/extindex"]), 'update');
+ my $nr;
+ {
+ my $es = PublicInbox::ExtSearch->new("$home/extindex");
+ my ($id, $prv);
+ my $smsg = $es->over->next_by_mid($mid, \$id, \$prv);
+ ok($smsg, 'tmpmsg indexed');
+ my $mset = $es->search->mset("mid:$mid");
+ is($mset->size, 1, 'new message found');
+ $mset = $es->search->mset('z:0..');
+ $nr = $mset->size;
+ }
+ truncate($cfg_path, $old_size) or xbail "truncate $!";
+ my $rdr = { 2 => \(my $err) };
+ ok(run_script([qw(-extindex --gc), "$home/extindex"], undef, $rdr),
+ 'gc to get rid of removed inbox');
+ is_deeply([ grep(!/^(?:I:|#)/, split(/^/m, $err)) ], [],
+ 'no non-informational errors in stderr');
+
+ my $es = PublicInbox::ExtSearch->new("$home/extindex");
+ my $mset = $es->search->mset("mid:$mid");
+ is($mset->size, 0, 'tmpmsg gone from search');
+ my ($id, $prv);
+ is($es->over->next_by_mid($mid, \$id, \$prv), undef,
+ 'tmpmsg gone from over');
+ $id = $prv = undef;
+ is($es->over->next_by_mid('testmessage@example.com', \$id, \$prv),
+ undef, 'remaining message not indavderover');
+ $mset = $es->search->mset('z:0..');
+ is($mset->size, $nr - 1, 'existing messages not clobbered from search');
+ my $o = $es->over->{dbh}->selectall_arrayref(<<EOM);
+SELECT num FROM over ORDER BY num
+EOM
+ is(scalar(@$o), $mset->size, 'over row count matches Xapian');
+ my $x = $es->over->{dbh}->selectall_arrayref(<<EOM);
+SELECT DISTINCT(docid) FROM xref3 ORDER BY docid
+EOM
+ is_deeply($x, $o, 'xref3 and over docids match');
+}
+
done_testing;