1 # Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
7 use PublicInbox::ContentHash qw(content_digest);
8 use File::Path qw(remove_tree);
9 use PublicInbox::TestCommon;
11 require_mods(qw(DBD::SQLite Search::Xapian));
12 use_ok 'PublicInbox::V2Writable';
13 use_ok 'PublicInbox::OverIdx';
14 my ($inboxdir, $for_destroy) = tmpdir();
16 inboxdir => $inboxdir,
17 name => 'test-v2writable',
19 -primary_address => 'test@example.com',
24 open my $fh, '<', 'COPYING' or die "can't open COPYING: $!";
28 my $phrase = q("defending all users' freedom");
29 my $mime = PublicInbox::Eml->new(<<'EOF'.$agpl);
32 Subject: this is a subject
33 Date: Fri, 02 Oct 1993 00:00:00 +0000
38 my ($mark1, $mark2, $mark3, $mark4);
40 my %config = %$ibx_config;
41 my $ibx = PublicInbox::Inbox->new(\%config);
42 my $im = PublicInbox::V2Writable->new($ibx, {nproc => 1});
43 my $im0 = $im->importer(0);
44 foreach my $i (1..10) {
45 $mime->header_set('Message-Id', "<$i\@example.com>");
46 ok($im->add($mime), "message $i added");
48 $mark1 = $im0->get_mark($im0->{tip});
50 $mark2 = $im0->get_mark($im0->{tip});
54 if ('test remove later') {
55 $mark3 = $im0->get_mark($im0->{tip});
56 $mime->header_set('Message-Id', "<5\@example.com>");
58 $mark4 = $im0->get_mark($im0->{tip});
62 $minmax = [ $ibx->mm->minmax ];
63 ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined');
64 is_deeply($minmax, [ 1, 10 ], 'minmax as expected');
65 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
67 my ($min, $max) = @$minmax;
68 $msgmap = $ibx->mm->msg_range(\$min, $max);
70 [1, '1@example.com' ],
71 [2, '2@example.com' ],
72 [3, '3@example.com' ],
73 [6, '6@example.com' ],
74 [7, '7@example.com' ],
75 [8, '8@example.com' ],
76 [9, '9@example.com' ],
77 [10, '10@example.com' ],
78 ], 'msgmap as expected');
82 my %config = %$ibx_config;
83 my $ibx = PublicInbox::Inbox->new(\%config);
84 my $im = PublicInbox::V2Writable->new($ibx, 1);
85 eval { $im->index_sync({reindex => 1}) };
86 is($@, '', 'no error from reindexing');
90 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
91 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
93 my ($min, $max) = $ibx->mm->minmax;
94 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
97 my $xap = "$inboxdir/xap".PublicInbox::Search::SCHEMA_VERSION();
99 ok(!-d $xap, 'Xapian directories removed');
101 my %config = %$ibx_config;
102 my $ibx = PublicInbox::Inbox->new(\%config);
103 my $im = PublicInbox::V2Writable->new($ibx, 1);
104 eval { $im->index_sync({reindex => 1}) };
105 is($@, '', 'no error from reindexing');
107 ok(-d $xap, 'Xapian directories recreated');
110 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
111 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
113 my ($min, $max) = $ibx->mm->minmax;
114 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
117 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
119 ok(!-d $xap, 'Xapian directories removed again');
122 local $SIG{__WARN__} = sub { push @warn, @_ };
123 my %config = %$ibx_config;
124 my $ibx = PublicInbox::Inbox->new(\%config);
125 my $im = PublicInbox::V2Writable->new($ibx, 1);
126 eval { $im->index_sync({reindex => 1}) };
127 is($@, '', 'no error from reindexing without msgmap');
128 is(scalar(@warn), 0, 'no warnings from reindexing');
130 ok(-d $xap, 'Xapian directories recreated');
132 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
133 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
135 my ($min, $max) = $ibx->mm->minmax;
136 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
140 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
142 ok(!-d $xap, 'Xapian directories removed again');
145 local $SIG{__WARN__} = sub { push @warn, @_ };
146 my %config = %$ibx_config;
147 my $ibx = PublicInbox::Inbox->new(\%config);
148 my $im = PublicInbox::V2Writable->new($ibx, 1);
149 eval { $im->index_sync({reindex => 1}) };
150 is($@, '', 'no error from reindexing without msgmap');
151 is_deeply(\@warn, [], 'no warnings');
153 ok(-d $xap, 'Xapian directories recreated');
155 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
156 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
157 my $mset = $ibx->search->mset($phrase);
158 isnt($mset->size, 0, "phrase search succeeds on indexlevel=full");
159 for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ }
161 my ($min, $max) = $ibx->mm->minmax;
162 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
165 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
167 ok(!-d $xap, 'Xapian directories removed again');
170 local $SIG{__WARN__} = sub { push @warn, @_ };
171 my %config = %$ibx_config;
172 $config{indexlevel} = 'medium';
173 my $ibx = PublicInbox::Inbox->new(\%config);
174 my $im = PublicInbox::V2Writable->new($ibx);
175 eval { $im->index_sync({reindex => 1}) };
176 is($@, '', 'no error from reindexing without msgmap');
177 is_deeply(\@warn, [], 'no warnings');
179 ok(-d $xap, 'Xapian directories recreated');
181 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
182 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
185 # not sure why, but Xapian seems to fallback to terms and
186 # phrase searches still work
187 delete $ibx->{search};
188 my $mset = $ibx->search->mset($phrase);
189 is($mset->size, 0, 'phrase search does not work on medium');
193 my $mset = $ibx->search->mset($words);
194 isnt($mset->size, 0, "normal search works on indexlevel=medium");
195 for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ }
197 ok($sizes{full} > $sizes{medium}, 'medium is smaller than full');
200 my ($min, $max) = $ibx->mm->minmax;
201 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
204 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
206 ok(!-d $xap, 'Xapian directories removed again');
209 local $SIG{__WARN__} = sub { push @warn, @_ };
210 my %config = %$ibx_config;
211 $config{indexlevel} = 'basic';
212 my $ibx = PublicInbox::Inbox->new(\%config);
213 my $im = PublicInbox::V2Writable->new($ibx);
214 eval { $im->index_sync({reindex => 1}) };
215 is($@, '', 'no error from reindexing without msgmap');
216 is_deeply(\@warn, [], 'no warnings');
218 ok(-d $xap, 'Xapian directories recreated');
220 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
221 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
223 isnt($ibx->search, 'no search for basic');
225 for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ }
226 ok($sizes{medium} > $sizes{basic}, 'basic is smaller than medium');
228 my ($min, $max) = $ibx->mm->minmax;
229 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
233 # An incremental indexing test
234 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
236 ok(!-d $xap, 'Xapian directories removed again');
239 local $SIG{__WARN__} = sub { push @warn, @_ };
240 my %config = %$ibx_config;
241 my $ibx = PublicInbox::Inbox->new(\%config);
242 # mark1 4 simple additions in the same index_sync
243 $ibx->{ref_head} = $mark1;
244 my $im = PublicInbox::V2Writable->new($ibx);
245 eval { $im->index_sync() };
246 is($@, '', 'no error from reindexing without msgmap');
247 is_deeply(\@warn, [], 'no warnings');
249 my ($min, $max) = $ibx->mm->minmax;
250 is($min, 1, 'min as expected');
251 is($max, 4, 'max as expected');
252 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
253 is_deeply($ibx->mm->msg_range(\$min, $max),
255 [1, '1@example.com' ],
256 [2, '2@example.com' ],
257 [3, '3@example.com' ],
258 [4, '4@example.com' ],
259 ], 'msgmap as expected' );
263 local $SIG{__WARN__} = sub { push @warn, @_ };
264 my %config = %$ibx_config;
265 my $ibx = PublicInbox::Inbox->new(\%config);
266 # mark2 A delete separated from an add in the same index_sync
267 $ibx->{ref_head} = $mark2;
268 my $im = PublicInbox::V2Writable->new($ibx);
269 eval { $im->index_sync() };
270 is($@, '', 'no error from reindexing without msgmap');
271 is_deeply(\@warn, [], 'no warnings');
273 my ($min, $max) = $ibx->mm->minmax;
274 is($min, 1, 'min as expected');
275 is($max, 3, 'max as expected');
276 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
277 is_deeply($ibx->mm->msg_range(\$min, $max),
279 [1, '1@example.com' ],
280 [2, '2@example.com' ],
281 [3, '3@example.com' ],
282 ], 'msgmap as expected' );
286 local $SIG{__WARN__} = sub { push @warn, @_ };
287 my %config = %$ibx_config;
288 my $ibx = PublicInbox::Inbox->new(\%config);
289 # mark3 adds following the delete at mark2
290 $ibx->{ref_head} = $mark3;
291 my $im = PublicInbox::V2Writable->new($ibx);
292 eval { $im->index_sync() };
293 is($@, '', 'no error from reindexing without msgmap');
294 is_deeply(\@warn, [], 'no warnings');
296 my ($min, $max) = $ibx->mm->minmax;
297 is($min, 1, 'min as expected');
298 is($max, 10, 'max as expected');
299 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
300 is_deeply($ibx->mm->msg_range(\$min, $max),
302 [1, '1@example.com' ],
303 [2, '2@example.com' ],
304 [3, '3@example.com' ],
305 [5, '5@example.com' ],
306 [6, '6@example.com' ],
307 [7, '7@example.com' ],
308 [8, '8@example.com' ],
309 [9, '9@example.com' ],
310 [10, '10@example.com' ],
311 ], 'msgmap as expected' );
315 local $SIG{__WARN__} = sub { push @warn, @_ };
316 my %config = %$ibx_config;
317 my $ibx = PublicInbox::Inbox->new(\%config);
318 # mark4 A delete of an older message
319 $ibx->{ref_head} = $mark4;
320 my $im = PublicInbox::V2Writable->new($ibx);
321 eval { $im->index_sync() };
322 is($@, '', 'no error from reindexing without msgmap');
323 is_deeply(\@warn, [], 'no warnings');
325 my ($min, $max) = $ibx->mm->minmax;
326 is($min, 1, 'min as expected');
327 is($max, 10, 'max as expected');
328 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
329 is_deeply($ibx->mm->msg_range(\$min, $max),
331 [1, '1@example.com' ],
332 [2, '2@example.com' ],
333 [3, '3@example.com' ],
334 [6, '6@example.com' ],
335 [7, '7@example.com' ],
336 [8, '8@example.com' ],
337 [9, '9@example.com' ],
338 [10, '10@example.com' ],
339 ], 'msgmap as expected' );
343 # Another incremental indexing test
344 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
346 ok(!-d $xap, 'Xapian directories removed again');
349 local $SIG{__WARN__} = sub { push @warn, @_ };
350 my %config = %$ibx_config;
351 my $ibx = PublicInbox::Inbox->new(\%config);
352 # mark2 an add and it's delete in the same index_sync
353 $ibx->{ref_head} = $mark2;
354 my $im = PublicInbox::V2Writable->new($ibx);
355 eval { $im->index_sync() };
356 is($@, '', 'no error from reindexing without msgmap');
357 is_deeply(\@warn, [], 'no warnings');
359 my ($min, $max) = $ibx->mm->minmax;
360 is($min, 1, 'min as expected');
361 is($max, 3, 'max as expected');
362 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
363 is_deeply($ibx->mm->msg_range(\$min, $max),
365 [1, '1@example.com' ],
366 [2, '2@example.com' ],
367 [3, '3@example.com' ],
368 ], 'msgmap as expected' );
372 local $SIG{__WARN__} = sub { push @warn, @_ };
373 my %config = %$ibx_config;
374 my $ibx = PublicInbox::Inbox->new(\%config);
375 # mark3 adds following the delete at mark2
376 $ibx->{ref_head} = $mark3;
377 my $im = PublicInbox::V2Writable->new($ibx);
378 eval { $im->index_sync() };
379 is($@, '', 'no error from reindexing without msgmap');
380 is_deeply(\@warn, [], 'no warnings');
382 my ($min, $max) = $ibx->mm->minmax;
383 is($min, 1, 'min as expected');
384 is($max, 10, 'max as expected');
385 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
386 is_deeply($ibx->mm->msg_range(\$min, $max),
388 [1, '1@example.com' ],
389 [2, '2@example.com' ],
390 [3, '3@example.com' ],
391 [5, '5@example.com' ],
392 [6, '6@example.com' ],
393 [7, '7@example.com' ],
394 [8, '8@example.com' ],
395 [9, '9@example.com' ],
396 [10, '10@example.com' ],
397 ], 'msgmap as expected' );
401 local $SIG{__WARN__} = sub { push @warn, @_ };
402 my %config = %$ibx_config;
403 my $ibx = PublicInbox::Inbox->new(\%config);
404 # mark4 A delete of an older message
405 $ibx->{ref_head} = $mark4;
406 my $im = PublicInbox::V2Writable->new($ibx);
407 eval { $im->index_sync() };
408 is($@, '', 'no error from reindexing without msgmap');
409 is_deeply(\@warn, [], 'no warnings');
411 my ($min, $max) = $ibx->mm->minmax;
412 is($min, 1, 'min as expected');
413 is($max, 10, 'max as expected');
414 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
415 is_deeply($ibx->mm->msg_range(\$min, $max),
417 [1, '1@example.com' ],
418 [2, '2@example.com' ],
419 [3, '3@example.com' ],
420 [6, '6@example.com' ],
421 [7, '7@example.com' ],
422 [8, '8@example.com' ],
423 [9, '9@example.com' ],
424 [10, '10@example.com' ],
425 ], 'msgmap as expected' );
428 my $check_rethread = sub {
431 local $SIG{__WARN__} = sub { push @warn, @_ };
432 my %config = %$ibx_config;
433 my $ibx = PublicInbox::Inbox->new(\%config);
434 my $f = $ibx->over->{dbh}->sqlite_db_filename;
435 my $over = PublicInbox::OverIdx->new($f);
436 my $dbh = $over->dbh;
437 my $non_ghost_tids = sub {
438 $dbh->selectall_arrayref(<<'');
439 SELECT tid FROM over WHERE num > 0 ORDER BY tid ASC
442 my $before = $non_ghost_tids->();
445 my $tid = PublicInbox::OverIdx::get_counter($dbh, 'thread');
446 my $nr = $dbh->do('UPDATE over SET tid = ?', undef, $tid);
447 diag "messing up all threads with tid=$tid";
449 my $v2w = PublicInbox::V2Writable->new($ibx);
451 my $pr = sub { push @pr, @_ };
452 $v2w->index_sync({reindex => 1, rethread => 1, -progress => $pr});
453 # diag "@pr"; # nobody cares
454 is_deeply(\@warn, [], 'no warnings on reindex + rethread');
456 my @n = $dbh->selectrow_array(<<EOS, undef, $tid);
457 SELECT COUNT(*) FROM over WHERE tid <= ?
459 is_deeply(\@n, [ 0 ], 'rethread dropped old threadids');
460 my $after = $non_ghost_tids->();
461 ok($after->[0]->[0] > $before->[-1]->[0],
462 'all tids greater than before');
463 is(scalar @$after, scalar @$before, 'thread count unchanged');
466 $check_rethread->('no-monster');
468 # A real example from linux-renesas-soc on lore where a 3-headed monster
469 # of a message has 3 sets of common headers. Another normal message
470 # previously existed with a single Message-ID that conflicts with one
471 # of the Message-IDs in the 3-headed monster.
474 local $SIG{__WARN__} = sub { push @warn, @_ };
475 my %config = %$ibx_config;
476 $config{indexlevel} = 'medium';
477 my $ibx = PublicInbox::Inbox->new(\%config);
478 my $im = PublicInbox::V2Writable->new($ibx);
479 my $m3 = PublicInbox::Eml->new(<<'EOF');
480 Date: Tue, 24 May 2016 14:34:22 -0700 (PDT)
481 Message-Id: <20160524.143422.552507610109476444.d@example.com>
484 Subject: Re: [PATCH v2 2/2] uno
485 From: <f@example.com>
486 In-Reply-To: <1463825855-7363-2-git-send-email-y@example.com>
487 References: <1463825855-7363-1-git-send-email-y@example.com>
488 <1463825855-7363-2-git-send-email-y@example.com>
489 Date: Wed, 25 May 2016 10:01:51 +0900
493 Subject: Re: [PATCH] dos
494 Message-ID: <20160525010150.GD7292@example.com>
495 References: <1463498133-23918-1-git-send-email-g+r@example.com>
496 In-Reply-To: <1463498133-23918-1-git-send-email-g+r@example.com>
500 Subject: [PATCH 12/13] tres
501 Date: Wed, 01 Jun 2016 01:32:35 +0300
502 Message-ID: <1923946.Jvi0TDUXFC@wasted.example.com>
503 In-Reply-To: <13205049.n7pM8utpHF@wasted.example.com>
504 References: <13205049.n7pM8utpHF@wasted.example.com>
506 Somehow we got a message with 3 sets of headers into one
507 message, could've been something broken on the archiver side.
510 my $m1 = PublicInbox::Eml->new(<<'EOF');
513 Subject: [PATCH 12/13]
514 Date: Wed, 01 Jun 2016 01:32:35 +0300
515 Message-ID: <1923946.Jvi0TDUXFC@wasted.example.com>
516 In-Reply-To: <13205049.n7pM8utpHF@wasted.example.com>
517 References: <13205049.n7pM8utpHF@wasted.example.com>
519 This is probably one of the original messages
526 eval { $im->index_sync() };
527 is($@, '', 'no error from initial indexing');
528 is_deeply(\@warn, [], 'no warnings from initial index');
529 eval { $im->index_sync({reindex=>1}) };
530 is($@, '', 'no error from reindexing after reused Message-ID (x3)');
531 is_deeply(\@warn, [], 'no warnings on reindex');
534 for my $s (qw(uno dos tres)) {
535 my $mset = $ibx->search->mset("s:$s");
536 my $msgs = $ibx->search->mset_to_smsg($ibx, $mset);
537 is(scalar(@$msgs), 1, "only one result for `$s'");
538 $uniq{$msgs->[0]->{num}}++;
540 is_deeply([values %uniq], [3], 'search on different subjects');
543 # XXX: not deterministic when dealing with ambiguous messages, oh well
544 $check_rethread->('3-headed-monster once');
545 $check_rethread->('3-headed-monster twice');
547 my $rdr = { 2 => \(my $err = '') };
548 my $env = { PI_CONFIG => '/dev/null' };
549 ok(run_script([qw(-index --reindex --xapian-only), $inboxdir], $env, $rdr),
550 '--xapian-only works');
551 is($err, '', 'no errors from --xapian-only');
554 use PublicInbox::Spawn qw(which);
555 skip 'only testing lsof(8) output on Linux', 1 if $^O ne 'linux';
556 my $lsof = which('lsof') or skip 'no lsof in PATH', 1;
557 my $rdr = { 2 => \(my $null_err) };
558 my @d = grep(m!/xap[0-9]+/!, xqx([$lsof, '-p', $$], undef, $rdr));
559 is_deeply(\@d, [], 'no deleted index files') or diag explain(\@d);