X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=script%2Fpublic-inbox-extindex;h=bee824b1878611691344af7f725063a47fd6b421;hb=4eee5af6011cc8cdefb66c9729952c7eff5c0b0b;hp=771486c425b315bae70fd8a82d62ab0331e0a827;hpb=f1921e46e16cabb6f705236581564b8fe901a76e;p=public-inbox.git diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index 771486c4..bee824b1 100755 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -1,5 +1,5 @@ #!perl -w -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; @@ -17,18 +17,22 @@ usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...] --batch-size=BYTES flush changes to OS after a given number of bytes --max-size=BYTES do not index messages larger than the given size --gc perform garbage collection instead of indexing + --dedupe[=MSGID] fix prior deduplication errors (may be repeated) + --reindex index previously indexed inboxes + --fast only reindex unseen/stale messages --verbose | -v increase verbosity (may be repeated) + --dry-run | -n dry-run on --dedupe BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) See public-inbox-extindex(1) man page for full documentation. EOF my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i - fsync|sync! + fsync|sync! fast dangerous indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s - gc commit-interval=i watch scan! - all help|h)) + dedupe:s@ gc commit-interval=i watch scan! dry-run|n + all C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; @@ -38,6 +42,7 @@ STDERR->autoflush(1); local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync # require lazily to speed up --help require PublicInbox::Admin; +PublicInbox::Admin::do_chdir(delete $opt->{C}); my $cfg = PublicInbox::Config->new; my $eidx_dir = shift(@ARGV); unless (defined $eidx_dir) { @@ -50,11 +55,17 @@ unless (defined $eidx_dir) { my @ibxs; if ($opt->{gc}) { die "E: inbox paths must not be specified with --gc\n" if @ARGV; - die "E: --all not compatible with --gc\n" if $opt->{all}; - die "E: --watch is not compatible with --gc\n" if $opt->{watch}; + for my $sw (qw(all watch dry-run dedupe)) { + die "E: --$sw is not compatible with --gc\n" if $opt->{$sw}; + } } else { @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); } +$opt->{'dry-run'} && !$opt->{dedupe} and + die "E: --dry-run only affects --dedupe\n"; +$opt->{fast} && !$opt->{reindex} and + die "E: --fast only affects --reindex\n"; + PublicInbox::Admin::require_or_die(qw(-search)); PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n"; PublicInbox::Admin::progress_prepare($opt); @@ -69,7 +80,7 @@ if ($opt->{gc}) { if ($opt->{all}) { $eidx->attach_config($cfg); } else { - $eidx->attach_inbox($_) for @ibxs; + $eidx->attach_config($cfg, \@ibxs); } if ($opt->{watch}) { $cfg = undef; # save memory only after SIGHUP