1 # Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
6 use PublicInbox::ContentHash qw(content_digest);
7 use File::Path qw(remove_tree);
8 use PublicInbox::TestCommon;
11 require_mods(qw(DBD::SQLite Search::Xapian));
12 use_ok 'PublicInbox::SearchIdx';
13 use_ok 'PublicInbox::Import';
14 use_ok 'PublicInbox::OverIdx';
15 my ($inboxdir, $for_destroy) = tmpdir();
17 inboxdir => $inboxdir,
18 name => 'test-v1reindex',
19 -primary_address => 'test@example.com',
23 my $mime = PublicInbox::Eml->new(<<'EOF');
26 Subject: this is a subject
27 Date: Fri, 02 Oct 1993 00:00:00 +0000
33 my ($mark1, $mark2, $mark3, $mark4);
35 my %config = %$ibx_config;
36 my $ibx = PublicInbox::Inbox->new(\%config);
37 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
39 foreach my $i (1..10) {
40 $mime->header_set('Message-Id', "<$i\@example.com>");
41 ok($im->add($mime), "message $i added");
43 $mark1 = $im->get_mark($im->{tip});
45 $mark2 = $im->get_mark($im->{tip});
49 if ('test remove later') {
50 $mark3 = $im->get_mark($im->{tip});
51 $mime->header_set('Message-Id', "<5\@example.com>");
53 $mark4 = $im->get_mark($im->{tip});
57 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
58 eval { $rw->index_sync() };
59 is($@, '', 'no error from indexing');
61 $minmax = [ $ibx->mm->minmax ];
62 ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined');
63 is_deeply($minmax, [ 1, 10 ], 'minmax as expected');
64 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
66 my ($min, $max) = @$minmax;
67 $msgmap = $ibx->mm->msg_range(\$min, $max);
69 [1, '1@example.com' ],
70 [2, '2@example.com' ],
71 [3, '3@example.com' ],
72 [6, '6@example.com' ],
73 [7, '7@example.com' ],
74 [8, '8@example.com' ],
75 [9, '9@example.com' ],
76 [10, '10@example.com' ],
77 ], 'msgmap as expected');
81 my %config = %$ibx_config;
82 my $ibx = PublicInbox::Inbox->new(\%config);
83 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
84 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
85 eval { $rw->index_sync({reindex => 1}) };
86 is($@, '', 'no error from reindexing');
89 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
91 my ($min, $max) = $ibx->mm->minmax;
92 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
95 my $xap = "$inboxdir/public-inbox/xapian".PublicInbox::Search::SCHEMA_VERSION();
97 ok(!-d $xap, 'Xapian directories removed');
99 my %config = %$ibx_config;
100 my $ibx = PublicInbox::Inbox->new(\%config);
101 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
102 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
104 eval { $rw->index_sync({reindex => 1}) };
105 is($@, '', 'no error from reindexing');
107 ok(-d $xap, 'Xapian directories recreated');
110 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
111 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
113 my ($min, $max) = $ibx->mm->minmax;
114 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
117 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
119 ok(!-d $xap, 'Xapian directories removed again');
122 local $SIG{__WARN__} = sub { push @warn, @_ };
123 my %config = %$ibx_config;
124 my $ibx = PublicInbox::Inbox->new(\%config);
125 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
126 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
127 eval { $rw->index_sync({reindex => 1}) };
128 is($@, '', 'no error from reindexing without msgmap');
129 is(scalar(@warn), 0, 'no warnings from reindexing');
131 ok(-d $xap, 'Xapian directories recreated');
133 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
134 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
136 my ($min, $max) = $ibx->mm->minmax;
137 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
140 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
142 ok(!-d $xap, 'Xapian directories removed again');
145 local $SIG{__WARN__} = sub { push @warn, @_ };
146 my %config = %$ibx_config;
147 my $ibx = PublicInbox::Inbox->new(\%config);
148 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
149 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
150 eval { $rw->index_sync({reindex => 1}) };
151 is($@, '', 'no error from reindexing without msgmap');
152 is_deeply(\@warn, [], 'no warnings');
154 ok(-d $xap, 'Xapian directories recreated');
156 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
157 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
159 my ($min, $max) = @$minmax;
160 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
163 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
165 ok(!-d $xap, 'Xapian directories removed again');
168 local $SIG{__WARN__} = sub { push @warn, @_ };
169 my %config = %$ibx_config;
170 $config{indexlevel} = 'medium';
171 my $ibx = PublicInbox::Inbox->new(\%config);
172 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
173 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
174 eval { $rw->index_sync({reindex => 1}) };
175 is($@, '', 'no error from reindexing without msgmap');
176 is_deeply(\@warn, [], 'no warnings');
178 ok(-d $xap, 'Xapian directories recreated');
180 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
181 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
182 my $mset = $ibx->search->mset('hello world');
183 isnt($mset->size, 0, 'got Xapian search results');
185 my ($min, $max) = $ibx->mm->minmax;
186 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
189 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
191 ok(!-d $xap, 'Xapian directories removed again');
194 local $SIG{__WARN__} = sub { push @warn, @_ };
195 my %config = %$ibx_config;
196 $config{indexlevel} = 'basic';
197 my $ibx = PublicInbox::Inbox->new(\%config);
198 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
199 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
200 eval { $rw->index_sync({reindex => 1}) };
201 is($@, '', 'no error from reindexing without msgmap');
202 is_deeply(\@warn, [], 'no warnings');
204 ok(-d $xap, 'Xapian directories recreated');
206 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
207 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
208 isnt($ibx->search, 'no search for basic');
210 my ($min, $max) = $ibx->mm->minmax;
211 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
214 # upgrade existing basic to medium
215 # note: changing indexlevels is not yet supported in v2,
216 # and may not be without more effort
220 local $SIG{__WARN__} = sub { push @warn, @_ };
221 my %config = %$ibx_config;
222 $config{indexlevel} = 'medium';
223 my $ibx = PublicInbox::Inbox->new(\%config);
224 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
225 eval { $rw->index_sync({reindex => 1}) };
226 is($@, '', 'no error from indexing');
227 is_deeply(\@warn, [], 'no warnings');
228 my $mset = $ibx->search->reopen->mset('hello world');
229 isnt($mset->size, 0, 'search OK after basic -> medium');
231 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
233 my ($min, $max) = $ibx->mm->minmax;
234 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
237 # An incremental indexing test
238 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
240 ok(!-d $xap, 'Xapian directories removed again');
243 local $SIG{__WARN__} = sub { push @warn, @_ };
244 my %config = %$ibx_config;
245 my $ibx = PublicInbox::Inbox->new(\%config);
246 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
247 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
248 # mark1 4 simple additions in the same index_sync
249 eval { $rw->index_sync({ref => $mark1}) };
250 is($@, '', 'no error from reindexing without msgmap');
251 is_deeply(\@warn, [], 'no warnings');
253 my ($min, $max) = $ibx->mm->minmax;
254 is($min, 1, 'min as expected');
255 is($max, 4, 'max as expected');
256 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
257 is_deeply($ibx->mm->msg_range(\$min, $max),
259 [1, '1@example.com' ],
260 [2, '2@example.com' ],
261 [3, '3@example.com' ],
262 [4, '4@example.com' ],
263 ], 'msgmap as expected' );
267 local $SIG{__WARN__} = sub { push @warn, @_ };
268 my %config = %$ibx_config;
269 my $ibx = PublicInbox::Inbox->new(\%config);
270 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
271 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
272 # mark2 A delete separated form and add in the same index_sync
273 eval { $rw->index_sync({ref => $mark2}) };
274 is($@, '', 'no error from reindexing without msgmap');
275 is_deeply(\@warn, [], 'no warnings');
277 my ($min, $max) = $ibx->mm->minmax;
278 is($min, 1, 'min as expected');
279 is($max, 3, 'max as expected');
280 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
281 is_deeply($ibx->mm->msg_range(\$min, $max),
283 [1, '1@example.com' ],
284 [2, '2@example.com' ],
285 [3, '3@example.com' ],
286 ], 'msgmap as expected' );
290 local $SIG{__WARN__} = sub { push @warn, @_ };
291 my %config = %$ibx_config;
292 my $ibx = PublicInbox::Inbox->new(\%config);
293 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
294 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
295 # mark3 adds following the delete at mark2
296 eval { $rw->index_sync({ref => $mark3}) };
297 is($@, '', 'no error from reindexing without msgmap');
298 is_deeply(\@warn, [], 'no warnings');
300 my ($min, $max) = $ibx->mm->minmax;
301 is($min, 1, 'min as expected');
302 is($max, 10, 'max as expected');
303 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
304 is_deeply($ibx->mm->msg_range(\$min, $max),
306 [1, '1@example.com' ],
307 [2, '2@example.com' ],
308 [3, '3@example.com' ],
309 [5, '5@example.com' ],
310 [6, '6@example.com' ],
311 [7, '7@example.com' ],
312 [8, '8@example.com' ],
313 [9, '9@example.com' ],
314 [10, '10@example.com' ],
315 ], 'msgmap as expected' );
319 local $SIG{__WARN__} = sub { push @warn, @_ };
320 my %config = %$ibx_config;
321 my $ibx = PublicInbox::Inbox->new(\%config);
322 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
323 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
324 # mark4 A delete of an older message
325 eval { $rw->index_sync({ref => $mark4}) };
326 is($@, '', 'no error from reindexing without msgmap');
327 is_deeply(\@warn, [], 'no warnings');
329 my ($min, $max) = $ibx->mm->minmax;
330 is($min, 1, 'min as expected');
331 is($max, 10, 'max as expected');
332 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
333 is_deeply($ibx->mm->msg_range(\$min, $max),
335 [1, '1@example.com' ],
336 [2, '2@example.com' ],
337 [3, '3@example.com' ],
338 [6, '6@example.com' ],
339 [7, '7@example.com' ],
340 [8, '8@example.com' ],
341 [9, '9@example.com' ],
342 [10, '10@example.com' ],
343 ], 'msgmap as expected' );
347 # Another incremental indexing test
348 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
350 ok(!-d $xap, 'Xapian directories removed again');
353 local $SIG{__WARN__} = sub { push @warn, @_ };
354 my %config = %$ibx_config;
355 my $ibx = PublicInbox::Inbox->new(\%config);
356 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
357 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
358 # mark2 an add and it's delete in the same index_sync
359 eval { $rw->index_sync({ref => $mark2}) };
360 is($@, '', 'no error from reindexing without msgmap');
361 is_deeply(\@warn, [], 'no warnings');
363 my ($min, $max) = $ibx->mm->minmax;
364 is($min, 1, 'min as expected');
365 is($max, 3, 'max as expected');
366 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
367 is_deeply($ibx->mm->msg_range(\$min, $max),
369 [1, '1@example.com' ],
370 [2, '2@example.com' ],
371 [3, '3@example.com' ],
372 ], 'msgmap as expected' );
376 local $SIG{__WARN__} = sub { push @warn, @_ };
377 my %config = %$ibx_config;
378 my $ibx = PublicInbox::Inbox->new(\%config);
379 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
380 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
381 # mark3 adds following the delete at mark2
382 eval { $rw->index_sync({ref => $mark3}) };
383 is($@, '', 'no error from reindexing without msgmap');
384 is_deeply(\@warn, [], 'no warnings');
386 my ($min, $max) = $ibx->mm->minmax;
387 is($min, 1, 'min as expected');
388 is($max, 10, 'max as expected');
389 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
390 is_deeply($ibx->mm->msg_range(\$min, $max),
392 [1, '1@example.com' ],
393 [2, '2@example.com' ],
394 [3, '3@example.com' ],
395 [5, '5@example.com' ],
396 [6, '6@example.com' ],
397 [7, '7@example.com' ],
398 [8, '8@example.com' ],
399 [9, '9@example.com' ],
400 [10, '10@example.com' ],
401 ], 'msgmap as expected' );
405 local $SIG{__WARN__} = sub { push @warn, @_ };
406 my %config = %$ibx_config;
407 my $ibx = PublicInbox::Inbox->new(\%config);
408 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
409 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
410 # mark4 A delete of an older message
411 eval { $rw->index_sync({ref => $mark4}) };
412 is($@, '', 'no error from reindexing without msgmap');
413 is_deeply(\@warn, [], 'no warnings');
415 my ($min, $max) = $ibx->mm->minmax;
416 is($min, 1, 'min as expected');
417 is($max, 10, 'max as expected');
418 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
419 is_deeply($ibx->mm->msg_range(\$min, $max),
421 [1, '1@example.com' ],
422 [2, '2@example.com' ],
423 [3, '3@example.com' ],
424 [6, '6@example.com' ],
425 [7, '7@example.com' ],
426 [8, '8@example.com' ],
427 [9, '9@example.com' ],
428 [10, '10@example.com' ],
429 ], 'msgmap as expected' );
434 local $SIG{__WARN__} = sub { push @warn, @_ };
435 my $ibx = PublicInbox::Inbox->new({ %$ibx_config });
436 my $f = $ibx->over->{dbh}->sqlite_db_filename;
437 my $over = PublicInbox::OverIdx->new($f);
438 my $dbh = $over->dbh;
439 my $non_ghost_tids = sub {
440 $dbh->selectall_arrayref(<<'');
441 SELECT tid FROM over WHERE num > 0 ORDER BY tid ASC
444 my $before = $non_ghost_tids->();
447 my $tid = PublicInbox::OverIdx::get_counter($dbh, 'thread');
448 my $nr = $dbh->do('UPDATE over SET tid = ?', undef, $tid);
450 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
452 my $pr = sub { push @pr, @_ };
453 $rw->index_sync({reindex => 1, rethread => 1, -progress => $pr });
454 my @n = $dbh->selectrow_array(<<EOS, undef, $tid);
455 SELECT COUNT(*) FROM over WHERE tid <= ?
457 is_deeply(\@n, [ 0 ], 'rethread dropped old threadids');
458 my $after = $non_ghost_tids->();
459 ok($after->[0]->[0] > $before->[-1]->[0],
460 'all tids greater than before');
461 is(scalar @$after, scalar @$before, 'thread count unchanged');
462 is_deeply([], \@warn, 'no warnings');
463 # diag "@pr"; # XXX do we care?