1 # Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
6 use PublicInbox::ContentHash qw(content_digest);
7 use File::Path qw(remove_tree);
8 use PublicInbox::TestCommon;
11 require_mods(qw(DBD::SQLite Search::Xapian));
12 use_ok 'PublicInbox::SearchIdx';
13 use_ok 'PublicInbox::Import';
14 use_ok 'PublicInbox::OverIdx';
15 my ($inboxdir, $for_destroy) = tmpdir();
17 inboxdir => $inboxdir,
18 name => 'test-v1reindex',
19 -primary_address => 'test@example.com',
22 my $mime = PublicInbox::Eml->new(<<'EOF');
25 Subject: this is a subject
26 Date: Fri, 02 Oct 1993 00:00:00 +0000
32 my ($mark1, $mark2, $mark3, $mark4);
34 my %config = %$ibx_config;
35 my $ibx = PublicInbox::Inbox->new(\%config);
36 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
38 foreach my $i (1..10) {
39 $mime->header_set('Message-Id', "<$i\@example.com>");
40 ok($im->add($mime), "message $i added");
42 $mark1 = $im->get_mark($im->{tip});
44 $mark2 = $im->get_mark($im->{tip});
48 if ('test remove later') {
49 $mark3 = $im->get_mark($im->{tip});
50 $mime->header_set('Message-Id', "<5\@example.com>");
52 $mark4 = $im->get_mark($im->{tip});
56 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
57 eval { $rw->index_sync() };
58 is($@, '', 'no error from indexing');
60 $minmax = [ $ibx->mm->minmax ];
61 ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined');
62 is_deeply($minmax, [ 1, 10 ], 'minmax as expected');
63 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
65 my ($min, $max) = @$minmax;
66 $msgmap = $ibx->mm->msg_range(\$min, $max);
68 [1, '1@example.com' ],
69 [2, '2@example.com' ],
70 [3, '3@example.com' ],
71 [6, '6@example.com' ],
72 [7, '7@example.com' ],
73 [8, '8@example.com' ],
74 [9, '9@example.com' ],
75 [10, '10@example.com' ],
76 ], 'msgmap as expected');
80 my %config = %$ibx_config;
81 my $ibx = PublicInbox::Inbox->new(\%config);
82 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
83 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
84 eval { $rw->index_sync({reindex => 1}) };
85 is($@, '', 'no error from reindexing');
88 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
90 my ($min, $max) = $ibx->mm->minmax;
91 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
94 my $xap = "$inboxdir/public-inbox/xapian".PublicInbox::Search::SCHEMA_VERSION();
96 ok(!-d $xap, 'Xapian directories removed');
98 my %config = %$ibx_config;
99 my $ibx = PublicInbox::Inbox->new(\%config);
100 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
101 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
103 eval { $rw->index_sync({reindex => 1}) };
104 is($@, '', 'no error from reindexing');
106 ok(-d $xap, 'Xapian directories recreated');
109 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
110 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
112 my ($min, $max) = $ibx->mm->minmax;
113 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
116 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
118 ok(!-d $xap, 'Xapian directories removed again');
121 local $SIG{__WARN__} = sub { push @warn, @_ };
122 my %config = %$ibx_config;
123 my $ibx = PublicInbox::Inbox->new(\%config);
124 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
125 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
126 eval { $rw->index_sync({reindex => 1}) };
127 is($@, '', 'no error from reindexing without msgmap');
128 is(scalar(@warn), 0, 'no warnings from reindexing');
130 ok(-d $xap, 'Xapian directories recreated');
132 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
133 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
135 my ($min, $max) = $ibx->mm->minmax;
136 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
139 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
141 ok(!-d $xap, 'Xapian directories removed again');
144 local $SIG{__WARN__} = sub { push @warn, @_ };
145 my %config = %$ibx_config;
146 my $ibx = PublicInbox::Inbox->new(\%config);
147 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
148 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
149 eval { $rw->index_sync({reindex => 1}) };
150 is($@, '', 'no error from reindexing without msgmap');
151 is_deeply(\@warn, [], 'no warnings');
153 ok(-d $xap, 'Xapian directories recreated');
155 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
156 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
158 my ($min, $max) = @$minmax;
159 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
162 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
164 ok(!-d $xap, 'Xapian directories removed again');
167 local $SIG{__WARN__} = sub { push @warn, @_ };
168 my %config = %$ibx_config;
169 $config{indexlevel} = 'medium';
170 my $ibx = PublicInbox::Inbox->new(\%config);
171 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
172 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
173 eval { $rw->index_sync({reindex => 1}) };
174 is($@, '', 'no error from reindexing without msgmap');
175 is_deeply(\@warn, [], 'no warnings');
177 ok(-d $xap, 'Xapian directories recreated');
179 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
180 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
181 my $mset = $ibx->search->query('hello world', {mset=>1});
182 isnt($mset->size, 0, 'got Xapian search results');
184 my ($min, $max) = $ibx->mm->minmax;
185 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
188 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
190 ok(!-d $xap, 'Xapian directories removed again');
193 local $SIG{__WARN__} = sub { push @warn, @_ };
194 my %config = %$ibx_config;
195 $config{indexlevel} = 'basic';
196 my $ibx = PublicInbox::Inbox->new(\%config);
197 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
198 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
199 eval { $rw->index_sync({reindex => 1}) };
200 is($@, '', 'no error from reindexing without msgmap');
201 is_deeply(\@warn, [], 'no warnings');
203 ok(-d $xap, 'Xapian directories recreated');
205 is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
206 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
207 isnt($ibx->search, 'no search for basic');
209 my ($min, $max) = $ibx->mm->minmax;
210 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
213 # upgrade existing basic to medium
214 # note: changing indexlevels is not yet supported in v2,
215 # and may not be without more effort
219 local $SIG{__WARN__} = sub { push @warn, @_ };
220 my %config = %$ibx_config;
221 $config{indexlevel} = 'medium';
222 my $ibx = PublicInbox::Inbox->new(\%config);
223 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
224 eval { $rw->index_sync({reindex => 1}) };
225 is($@, '', 'no error from indexing');
226 is_deeply(\@warn, [], 'no warnings');
227 my $mset = $ibx->search->reopen->query('hello world', {mset=>1});
228 isnt($mset->size, 0, 'search OK after basic -> medium');
230 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
232 my ($min, $max) = $ibx->mm->minmax;
233 is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
236 # An incremental indexing test
237 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
239 ok(!-d $xap, 'Xapian directories removed again');
242 local $SIG{__WARN__} = sub { push @warn, @_ };
243 my %config = %$ibx_config;
244 my $ibx = PublicInbox::Inbox->new(\%config);
245 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
246 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
247 # mark1 4 simple additions in the same index_sync
248 eval { $rw->index_sync({ref => $mark1}) };
249 is($@, '', 'no error from reindexing without msgmap');
250 is_deeply(\@warn, [], 'no warnings');
252 my ($min, $max) = $ibx->mm->minmax;
253 is($min, 1, 'min as expected');
254 is($max, 4, 'max as expected');
255 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
256 is_deeply($ibx->mm->msg_range(\$min, $max),
258 [1, '1@example.com' ],
259 [2, '2@example.com' ],
260 [3, '3@example.com' ],
261 [4, '4@example.com' ],
262 ], 'msgmap as expected' );
266 local $SIG{__WARN__} = sub { push @warn, @_ };
267 my %config = %$ibx_config;
268 my $ibx = PublicInbox::Inbox->new(\%config);
269 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
270 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
271 # mark2 A delete separated form and add in the same index_sync
272 eval { $rw->index_sync({ref => $mark2}) };
273 is($@, '', 'no error from reindexing without msgmap');
274 is_deeply(\@warn, [], 'no warnings');
276 my ($min, $max) = $ibx->mm->minmax;
277 is($min, 1, 'min as expected');
278 is($max, 3, 'max as expected');
279 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
280 is_deeply($ibx->mm->msg_range(\$min, $max),
282 [1, '1@example.com' ],
283 [2, '2@example.com' ],
284 [3, '3@example.com' ],
285 ], 'msgmap as expected' );
289 local $SIG{__WARN__} = sub { push @warn, @_ };
290 my %config = %$ibx_config;
291 my $ibx = PublicInbox::Inbox->new(\%config);
292 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
293 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
294 # mark3 adds following the delete at mark2
295 eval { $rw->index_sync({ref => $mark3}) };
296 is($@, '', 'no error from reindexing without msgmap');
297 is_deeply(\@warn, [], 'no warnings');
299 my ($min, $max) = $ibx->mm->minmax;
300 is($min, 1, 'min as expected');
301 is($max, 10, 'max as expected');
302 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
303 is_deeply($ibx->mm->msg_range(\$min, $max),
305 [1, '1@example.com' ],
306 [2, '2@example.com' ],
307 [3, '3@example.com' ],
308 [5, '5@example.com' ],
309 [6, '6@example.com' ],
310 [7, '7@example.com' ],
311 [8, '8@example.com' ],
312 [9, '9@example.com' ],
313 [10, '10@example.com' ],
314 ], 'msgmap as expected' );
318 local $SIG{__WARN__} = sub { push @warn, @_ };
319 my %config = %$ibx_config;
320 my $ibx = PublicInbox::Inbox->new(\%config);
321 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
322 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
323 # mark4 A delete of an older message
324 eval { $rw->index_sync({ref => $mark4}) };
325 is($@, '', 'no error from reindexing without msgmap');
326 is_deeply(\@warn, [], 'no warnings');
328 my ($min, $max) = $ibx->mm->minmax;
329 is($min, 1, 'min as expected');
330 is($max, 10, 'max as expected');
331 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
332 is_deeply($ibx->mm->msg_range(\$min, $max),
334 [1, '1@example.com' ],
335 [2, '2@example.com' ],
336 [3, '3@example.com' ],
337 [6, '6@example.com' ],
338 [7, '7@example.com' ],
339 [8, '8@example.com' ],
340 [9, '9@example.com' ],
341 [10, '10@example.com' ],
342 ], 'msgmap as expected' );
346 # Another incremental indexing test
347 ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap');
349 ok(!-d $xap, 'Xapian directories removed again');
352 local $SIG{__WARN__} = sub { push @warn, @_ };
353 my %config = %$ibx_config;
354 my $ibx = PublicInbox::Inbox->new(\%config);
355 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
356 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
357 # mark2 an add and it's delete in the same index_sync
358 eval { $rw->index_sync({ref => $mark2}) };
359 is($@, '', 'no error from reindexing without msgmap');
360 is_deeply(\@warn, [], 'no warnings');
362 my ($min, $max) = $ibx->mm->minmax;
363 is($min, 1, 'min as expected');
364 is($max, 3, 'max as expected');
365 is($ibx->mm->num_highwater, 4, 'num_highwater as expected');
366 is_deeply($ibx->mm->msg_range(\$min, $max),
368 [1, '1@example.com' ],
369 [2, '2@example.com' ],
370 [3, '3@example.com' ],
371 ], 'msgmap as expected' );
375 local $SIG{__WARN__} = sub { push @warn, @_ };
376 my %config = %$ibx_config;
377 my $ibx = PublicInbox::Inbox->new(\%config);
378 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
379 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
380 # mark3 adds following the delete at mark2
381 eval { $rw->index_sync({ref => $mark3}) };
382 is($@, '', 'no error from reindexing without msgmap');
383 is_deeply(\@warn, [], 'no warnings');
385 my ($min, $max) = $ibx->mm->minmax;
386 is($min, 1, 'min as expected');
387 is($max, 10, 'max as expected');
388 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
389 is_deeply($ibx->mm->msg_range(\$min, $max),
391 [1, '1@example.com' ],
392 [2, '2@example.com' ],
393 [3, '3@example.com' ],
394 [5, '5@example.com' ],
395 [6, '6@example.com' ],
396 [7, '7@example.com' ],
397 [8, '8@example.com' ],
398 [9, '9@example.com' ],
399 [10, '10@example.com' ],
400 ], 'msgmap as expected' );
404 local $SIG{__WARN__} = sub { push @warn, @_ };
405 my %config = %$ibx_config;
406 my $ibx = PublicInbox::Inbox->new(\%config);
407 my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx);
408 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
409 # mark4 A delete of an older message
410 eval { $rw->index_sync({ref => $mark4}) };
411 is($@, '', 'no error from reindexing without msgmap');
412 is_deeply(\@warn, [], 'no warnings');
414 my ($min, $max) = $ibx->mm->minmax;
415 is($min, 1, 'min as expected');
416 is($max, 10, 'max as expected');
417 is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
418 is_deeply($ibx->mm->msg_range(\$min, $max),
420 [1, '1@example.com' ],
421 [2, '2@example.com' ],
422 [3, '3@example.com' ],
423 [6, '6@example.com' ],
424 [7, '7@example.com' ],
425 [8, '8@example.com' ],
426 [9, '9@example.com' ],
427 [10, '10@example.com' ],
428 ], 'msgmap as expected' );
433 local $SIG{__WARN__} = sub { push @warn, @_ };
434 my $ibx = PublicInbox::Inbox->new({ %$ibx_config });
435 my $f = $ibx->over->{dbh}->sqlite_db_filename;
436 my $over = PublicInbox::OverIdx->new($f);
437 my $dbh = $over->dbh;
438 my $non_ghost_tids = sub {
439 $dbh->selectall_arrayref(<<'');
440 SELECT tid FROM over WHERE num > 0 ORDER BY tid ASC
443 my $before = $non_ghost_tids->();
446 my $tid = PublicInbox::OverIdx::get_counter($dbh, 'thread');
447 my $nr = $dbh->do('UPDATE over SET tid = ?', undef, $tid);
449 my $rw = PublicInbox::SearchIdx->new($ibx, 1);
451 my $pr = sub { push @pr, @_ };
452 $rw->index_sync({reindex => 1, rethread => 1, -progress => $pr });
453 my @n = $dbh->selectrow_array(<<EOS, undef, $tid);
454 SELECT COUNT(*) FROM over WHERE tid <= ?
456 is_deeply(\@n, [ 0 ], 'rethread dropped old threadids');
457 my $after = $non_ghost_tids->();
458 ok($after->[0]->[0] > $before->[-1]->[0],
459 'all tids greater than before');
460 is(scalar @$after, scalar @$before, 'thread count unchanged');
461 is_deeply([], \@warn, 'no warnings');
462 # diag "@pr"; # XXX do we care?