1 # Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
7 use PublicInbox::ContentId qw(content_digest);
8 use File::Path qw(remove_tree);
9 use PublicInbox::TestCommon;
11 require_mods(qw(DBD::SQLite Search::Xapian));
12 use_ok 'PublicInbox::V2Writable';
13 my ($inboxdir, $for_destroy) = tmpdir();
15 inboxdir => $inboxdir,
16 name => 'test-v2writable',
18 -primary_address => 'test@example.com',
22 open my $fh, '<', 'COPYING' or die "can't open COPYING: $!";
26 $agpl or die "AGPL or die :P\n";
27 my $phrase = q("defending all users' freedom");
28 my $mime = PublicInbox::MIME->create(
30 From => 'a@example.com',
31 To => 'test@example.com',
32 Subject => 'this is a subject',
33 Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
39 my ($mark1, $mark2, $mark3, $mark4);
41 my %config = %$ibx_config;
42 my $ibx = PublicInbox::Inbox->new(\%config);
43 my $im = PublicInbox::V2Writable->new($ibx, {nproc => 1});
44 my $im0 = $im->importer();
45 foreach my $i (1..10) {
46 $mime->header_set('Message-Id', "<$i\@example.com>");
47 ok($im->add($mime), "message $i added");
49 $mark1 = $im0->get_mark($im0->{tip});
51 $mark2 = $im0->get_mark($im0->{tip});
55 if ('test remove later') {
56 $mark3 = $im0->get_mark($im0->{tip});
57 $mime->header_set('Message-Id', "<5\@example.com>");
59 $mark4 = $im0->get_mark($im0->{tip});
63 $minmax = [ $ibx->mm->minmax ];
64 ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined');
65 is_deeply($minmax, [ 1, 10 ], 'minmax as expected');
66 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
68 my ($min, $max) = @$minmax;
69 $msgmap = $ibx->mm->msg_range(\$min, $max);
71 [1, '1@example.com' ],
72 [2, '2@example.com' ],
73 [3, '3@example.com' ],
74 [6, '6@example.com' ],
75 [7, '7@example.com' ],
76 [8, '8@example.com' ],
77 [9, '9@example.com' ],
78 [10, '10@example.com' ],
79 ], 'msgmap as expected');
83 my %config = %$ibx_config;
84 my $ibx = PublicInbox::Inbox->new(\%config);
85 my $im = PublicInbox::V2Writable->new($ibx, 1);
86 eval { $im->index_sync({reindex => 1}) };
87 is($@, '', 'no error from reindexing');
91 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
92 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
94 my ($min, $max) = $ibx->mm->minmax;
95 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
98 my $xap = "$inboxdir/xap".PublicInbox::Search::SCHEMA_VERSION();
100 ok(!-d $xap, 'Xapian directories removed');
102 my %config = %$ibx_config;
103 my $ibx = PublicInbox::Inbox->new(\%config);
104 my $im = PublicInbox::V2Writable->new($ibx, 1);
105 eval { $im->index_sync({reindex => 1}) };
106 is($@, '', 'no error from reindexing');
108 ok(-d $xap, 'Xapian directories recreated');
111 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
112 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
114 my ($min, $max) = $ibx->mm->minmax;
115 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
118 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
120 ok(!-d $xap, 'Xapian directories removed again');
123 local $SIG{__WARN__} = sub { push @warn, @_ };
124 my %config = %$ibx_config;
125 my $ibx = PublicInbox::Inbox->new(\%config);
126 my $im = PublicInbox::V2Writable->new($ibx, 1);
127 eval { $im->index_sync({reindex => 1}) };
128 is($@, '', 'no error from reindexing without msgmap');
129 is(scalar(@warn), 0, 'no warnings from reindexing');
131 ok(-d $xap, 'Xapian directories recreated');
133 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
134 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
136 my ($min, $max) = $ibx->mm->minmax;
137 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
141 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
143 ok(!-d $xap, 'Xapian directories removed again');
146 local $SIG{__WARN__} = sub { push @warn, @_ };
147 my %config = %$ibx_config;
148 my $ibx = PublicInbox::Inbox->new(\%config);
149 my $im = PublicInbox::V2Writable->new($ibx, 1);
150 eval { $im->index_sync({reindex => 1}) };
151 is($@, '', 'no error from reindexing without msgmap');
152 is_deeply(\@warn, [], 'no warnings');
154 ok(-d $xap, 'Xapian directories recreated');
156 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
157 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
158 my $mset = $ibx->search->query($phrase, {mset=>1});
159 isnt($mset->size, 0, "phrase search succeeds on indexlevel=full");
160 for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ }
162 my ($min, $max) = $ibx->mm->minmax;
163 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
166 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
168 ok(!-d $xap, 'Xapian directories removed again');
171 local $SIG{__WARN__} = sub { push @warn, @_ };
172 my %config = %$ibx_config;
173 $config{indexlevel} = 'medium';
174 my $ibx = PublicInbox::Inbox->new(\%config);
175 my $im = PublicInbox::V2Writable->new($ibx);
176 eval { $im->index_sync({reindex => 1}) };
177 is($@, '', 'no error from reindexing without msgmap');
178 is_deeply(\@warn, [], 'no warnings');
180 ok(-d $xap, 'Xapian directories recreated');
182 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
183 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
186 # not sure why, but Xapian seems to fallback to terms and
187 # phrase searches still work
188 delete $ibx->{search};
189 my $mset = $ibx->search->query($phrase, {mset=>1});
190 is($mset->size, 0, 'phrase search does not work on medium');
194 my $mset = $ibx->search->query($words, {mset=>1});
195 isnt($mset->size, 0, "normal search works on indexlevel=medium");
196 for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ }
198 ok($sizes{full} > $sizes{medium}, 'medium is smaller than full');
201 my ($min, $max) = $ibx->mm->minmax;
202 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
205 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
207 ok(!-d $xap, 'Xapian directories removed again');
210 local $SIG{__WARN__} = sub { push @warn, @_ };
211 my %config = %$ibx_config;
212 $config{indexlevel} = 'basic';
213 my $ibx = PublicInbox::Inbox->new(\%config);
214 my $im = PublicInbox::V2Writable->new($ibx);
215 eval { $im->index_sync({reindex => 1}) };
216 is($@, '', 'no error from reindexing without msgmap');
217 is_deeply(\@warn, [], 'no warnings');
219 ok(-d $xap, 'Xapian directories recreated');
221 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
222 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
224 isnt($ibx->search, 'no search for basic');
226 for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ }
227 ok($sizes{medium} > $sizes{basic}, 'basic is smaller than medium');
229 my ($min, $max) = $ibx->mm->minmax;
230 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
234 # An incremental indexing test
235 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
237 ok(!-d $xap, 'Xapian directories removed again');
240 local $SIG{__WARN__} = sub { push @warn, @_ };
241 my %config = %$ibx_config;
242 my $ibx = PublicInbox::Inbox->new(\%config);
243 # mark1 4 simple additions in the same index_sync
244 $ibx->{ref_head} = $mark1;
245 my $im = PublicInbox::V2Writable->new($ibx);
246 eval { $im->index_sync() };
247 is($@, '', 'no error from reindexing without msgmap');
248 is_deeply(\@warn, [], 'no warnings');
250 my ($min, $max) = $ibx->mm->minmax;
251 is($min, 1, 'min as expected');
252 is($max, 4, 'max as expected');
253 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
254 is_deeply($ibx->mm->msg_range(\$min, $max),
256 [1, '1@example.com' ],
257 [2, '2@example.com' ],
258 [3, '3@example.com' ],
259 [4, '4@example.com' ],
260 ], 'msgmap as expected' );
264 local $SIG{__WARN__} = sub { push @warn, @_ };
265 my %config = %$ibx_config;
266 my $ibx = PublicInbox::Inbox->new(\%config);
267 # mark2 A delete separated from an add in the same index_sync
268 $ibx->{ref_head} = $mark2;
269 my $im = PublicInbox::V2Writable->new($ibx);
270 eval { $im->index_sync() };
271 is($@, '', 'no error from reindexing without msgmap');
272 is_deeply(\@warn, [], 'no warnings');
274 my ($min, $max) = $ibx->mm->minmax;
275 is($min, 1, 'min as expected');
276 is($max, 3, 'max as expected');
277 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
278 is_deeply($ibx->mm->msg_range(\$min, $max),
280 [1, '1@example.com' ],
281 [2, '2@example.com' ],
282 [3, '3@example.com' ],
283 ], 'msgmap as expected' );
287 local $SIG{__WARN__} = sub { push @warn, @_ };
288 my %config = %$ibx_config;
289 my $ibx = PublicInbox::Inbox->new(\%config);
290 # mark3 adds following the delete at mark2
291 $ibx->{ref_head} = $mark3;
292 my $im = PublicInbox::V2Writable->new($ibx);
293 eval { $im->index_sync() };
294 is($@, '', 'no error from reindexing without msgmap');
295 is_deeply(\@warn, [], 'no warnings');
297 my ($min, $max) = $ibx->mm->minmax;
298 is($min, 1, 'min as expected');
299 is($max, 10, 'max as expected');
300 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
301 is_deeply($ibx->mm->msg_range(\$min, $max),
303 [1, '1@example.com' ],
304 [2, '2@example.com' ],
305 [3, '3@example.com' ],
306 [5, '5@example.com' ],
307 [6, '6@example.com' ],
308 [7, '7@example.com' ],
309 [8, '8@example.com' ],
310 [9, '9@example.com' ],
311 [10, '10@example.com' ],
312 ], 'msgmap as expected' );
316 local $SIG{__WARN__} = sub { push @warn, @_ };
317 my %config = %$ibx_config;
318 my $ibx = PublicInbox::Inbox->new(\%config);
319 # mark4 A delete of an older message
320 $ibx->{ref_head} = $mark4;
321 my $im = PublicInbox::V2Writable->new($ibx);
322 eval { $im->index_sync() };
323 is($@, '', 'no error from reindexing without msgmap');
324 is_deeply(\@warn, [], 'no warnings');
326 my ($min, $max) = $ibx->mm->minmax;
327 is($min, 1, 'min as expected');
328 is($max, 10, 'max as expected');
329 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
330 is_deeply($ibx->mm->msg_range(\$min, $max),
332 [1, '1@example.com' ],
333 [2, '2@example.com' ],
334 [3, '3@example.com' ],
335 [6, '6@example.com' ],
336 [7, '7@example.com' ],
337 [8, '8@example.com' ],
338 [9, '9@example.com' ],
339 [10, '10@example.com' ],
340 ], 'msgmap as expected' );
344 # Another incremental indexing test
345 ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap');
347 ok(!-d $xap, 'Xapian directories removed again');
350 local $SIG{__WARN__} = sub { push @warn, @_ };
351 my %config = %$ibx_config;
352 my $ibx = PublicInbox::Inbox->new(\%config);
353 # mark2 an add and it's delete in the same index_sync
354 $ibx->{ref_head} = $mark2;
355 my $im = PublicInbox::V2Writable->new($ibx);
356 eval { $im->index_sync() };
357 is($@, '', 'no error from reindexing without msgmap');
358 is_deeply(\@warn, [], 'no warnings');
360 my ($min, $max) = $ibx->mm->minmax;
361 is($min, 1, 'min as expected');
362 is($max, 3, 'max as expected');
363 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
364 is_deeply($ibx->mm->msg_range(\$min, $max),
366 [1, '1@example.com' ],
367 [2, '2@example.com' ],
368 [3, '3@example.com' ],
369 ], 'msgmap as expected' );
373 local $SIG{__WARN__} = sub { push @warn, @_ };
374 my %config = %$ibx_config;
375 my $ibx = PublicInbox::Inbox->new(\%config);
376 # mark3 adds following the delete at mark2
377 $ibx->{ref_head} = $mark3;
378 my $im = PublicInbox::V2Writable->new($ibx);
379 eval { $im->index_sync() };
380 is($@, '', 'no error from reindexing without msgmap');
381 is_deeply(\@warn, [], 'no warnings');
383 my ($min, $max) = $ibx->mm->minmax;
384 is($min, 1, 'min as expected');
385 is($max, 10, 'max as expected');
386 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
387 is_deeply($ibx->mm->msg_range(\$min, $max),
389 [1, '1@example.com' ],
390 [2, '2@example.com' ],
391 [3, '3@example.com' ],
392 [5, '5@example.com' ],
393 [6, '6@example.com' ],
394 [7, '7@example.com' ],
395 [8, '8@example.com' ],
396 [9, '9@example.com' ],
397 [10, '10@example.com' ],
398 ], 'msgmap as expected' );
402 local $SIG{__WARN__} = sub { push @warn, @_ };
403 my %config = %$ibx_config;
404 my $ibx = PublicInbox::Inbox->new(\%config);
405 # mark4 A delete of an older message
406 $ibx->{ref_head} = $mark4;
407 my $im = PublicInbox::V2Writable->new($ibx);
408 eval { $im->index_sync() };
409 is($@, '', 'no error from reindexing without msgmap');
410 is_deeply(\@warn, [], 'no warnings');
412 my ($min, $max) = $ibx->mm->minmax;
413 is($min, 1, 'min as expected');
414 is($max, 10, 'max as expected');
415 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
416 is_deeply($ibx->mm->msg_range(\$min, $max),
418 [1, '1@example.com' ],
419 [2, '2@example.com' ],
420 [3, '3@example.com' ],
421 [6, '6@example.com' ],
422 [7, '7@example.com' ],
423 [8, '8@example.com' ],
424 [9, '9@example.com' ],
425 [10, '10@example.com' ],
426 ], 'msgmap as expected' );
429 # A real example from linux-renesas-soc on lore where a 3-headed monster
430 # of a message has 3 sets of common headers. Another normal message
431 # previously existed with a single Message-ID that conflicts with one
432 # of the Message-IDs in the 3-headed monster.
435 local $SIG{__WARN__} = sub { push @warn, @_ };
436 my %config = %$ibx_config;
437 $config{indexlevel} = 'medium';
438 my $ibx = PublicInbox::Inbox->new(\%config);
439 my $im = PublicInbox::V2Writable->new($ibx);
440 my $m3 = PublicInbox::MIME->new(<<'EOF');
441 Date: Tue, 24 May 2016 14:34:22 -0700 (PDT)
442 Message-Id: <20160524.143422.552507610109476444.d@example.com>
445 Subject: Re: [PATCH v2 2/2] uno
446 From: <f@example.com>
447 In-Reply-To: <1463825855-7363-2-git-send-email-y@example.com>
448 References: <1463825855-7363-1-git-send-email-y@example.com>
449 <1463825855-7363-2-git-send-email-y@example.com>
450 Date: Wed, 25 May 2016 10:01:51 +0900
454 Subject: Re: [PATCH] dos
455 Message-ID: <20160525010150.GD7292@example.com>
456 References: <1463498133-23918-1-git-send-email-g+r@example.com>
457 In-Reply-To: <1463498133-23918-1-git-send-email-g+r@example.com>
461 Subject: [PATCH 12/13] tres
462 Date: Wed, 01 Jun 2016 01:32:35 +0300
463 Message-ID: <1923946.Jvi0TDUXFC@wasted.example.com>
464 In-Reply-To: <13205049.n7pM8utpHF@wasted.example.com>
465 References: <13205049.n7pM8utpHF@wasted.example.com>
467 Somehow we got a message with 3 sets of headers into one
468 message, could've been something broken on the archiver side.
471 my $m1 = PublicInbox::MIME->new(<<'EOF');
474 Subject: [PATCH 12/13]
475 Date: Wed, 01 Jun 2016 01:32:35 +0300
476 Message-ID: <1923946.Jvi0TDUXFC@wasted.example.com>
477 In-Reply-To: <13205049.n7pM8utpHF@wasted.example.com>
478 References: <13205049.n7pM8utpHF@wasted.example.com>
480 This is probably one of the original messages
487 eval { $im->index_sync() };
488 is($@, '', 'no error from initial indexing');
489 is_deeply(\@warn, [], 'no warnings from initial index');
490 eval { $im->index_sync({reindex=>1}) };
491 is($@, '', 'no error from reindexing after reused Message-ID (x3)');
492 is_deeply(\@warn, [], 'no warnings on reindex');
495 for my $s (qw(uno dos tres)) {
496 my $msgs = $ibx->search->query("s:$s");
497 is(scalar(@$msgs), 1, "only one result for `$s'");
498 $uniq{$msgs->[0]->{num}}++;
500 is_deeply([values %uniq], [3], 'search on different subjects');