public-inboxes where cross-posting is common, this allows
significant space savings on Xapian indices.
+=item --gc
+
+Perform garbage collection instead of indexing. Use this if
+inboxes are removed from the extindex, or if messages are
+purged or removed from some inboxes.
+
+=item --reindex
+
+Forces a re-index of all messages in the extindex. This can be
+used for in-place upgrades and bugfixes while read-only server
+processes are utilizing the index. Keep in mind this roughly
+doubles the size of the already-large Xapian database.
+
+The extindex locks will be released roughly every 10s to
+allow L<public-inbox-mda(1)> and L<public-inbox-watch(1)>
+processes to write to the extindex.
+
+=item --fast
+
+Used with C<--reindex>, it will only look for new and stale
+entries and not touch already-indexed messages.
+
=back
=head1 FILES
local $sync->{-regen_fmt} =
"$ekey checking unseen %u/".$ibx->over->max."\n";
${$sync->{nr}} = 0;
-
+ my $fast = $sync->{-opt}->{fast};
while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end)})) {
${$sync->{nr}} = $beg;
$beg = $msgs->[-1]->{num} + 1;
# the first time around ASAP:
if (scalar(@$docids) == 0) {
reindex_unseen($self, $sync, $ibx, $xsmsg);
- } else { # already seen, reindex later
+ } elsif (!$fast) { # already seen, reindex later
for my $r (@$docids) {
$self->{oidx}->eidxq_add($r->[0]);
}
my $fetching;
my $ekey = $ibx->eidx_key;
local $sync->{-regen_fmt} =
- "$ekey check stale/missing %u/".$ibx->over->max."\n";
+ "$ekey checking stale/missing %u/".$ibx->over->max."\n";
${$sync->{nr}} = 0;
do {
if (checkpoint_due($sync)) {
--max-size=BYTES do not index messages larger than the given size
--gc perform garbage collection instead of indexing
--dedupe[=MSGID] fix prior deduplication errors (may be repeated)
+ --reindex index previously indexed inboxes
+ --fast only reindex unseen/stale messages
--verbose | -v increase verbosity (may be repeated)
--dry-run | -n dry-run on --dedupe
EOF
my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 };
GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i
- fsync|sync!
+ fsync|sync! fast
indexlevel|index-level|L=s max_size|max-size=s
batch_size|batch-size=s
dedupe:s@ gc commit-interval=i watch scan! dry-run|n
} else {
@ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
}
-if ($opt->{'dry-run'} && !$opt->{dedupe}) {
+$opt->{'dry-run'} && !$opt->{dedupe} and
die "E: --dry-run only affects --dedupe\n";
-}
+$opt->{fast} && !$opt->{reindex} and
+ die "E: --fast only affects --reindex\n";
PublicInbox::Admin::require_or_die(qw(-search));
PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n";
$es->{xdb}->reopen;
$mset = $es->mset("mid:$new->{mid}");
is($mset->size, 0, 'stale mid gone Xapian');
+
+ ok(run_script([qw(-extindex --reindex --all --fast), "$home/extindex"],
+ undef, $opt), '--reindex w/ --fast');
+ ok(!run_script([qw(-extindex --all --fast), "$home/extindex"],
+ undef, $opt), '--fast alone makes no sense');
}
if ('reindex catches content bifurcation') {