From: Eric Wong Date: Sat, 28 Mar 2020 00:56:04 +0000 (+0000) Subject: index: support --compact / -c on command-line X-Git-Tag: v1.4.0~46 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=b5ddcb3352ef31aeb03b4c4cbb04af3de34a4c4f index: support --compact / -c on command-line It's more convenient to specify `-c' / `--compact' on the command-line when reindexing than it is to invoke public-inbox-compact(1) separately. This is especially convenient in low-space situations when public-inbox-index is operating on multiple inboxes sequentially, as compaction can happen immediately after indexing each inbox, instead of waiting until all inboxes are indexed. --- diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod index 14113ec8..dede5d2e 100644 --- a/Documentation/public-inbox-index.pod +++ b/Documentation/public-inbox-index.pod @@ -4,7 +4,7 @@ public-inbox-index - create and update search indices =head1 SYNOPSIS -public-inbox-index [OPTIONS] INBOX_DIR +public-inbox-index [OPTIONS] INBOX_DIR... =head1 DESCRIPTION @@ -32,16 +32,32 @@ normal search functionality. =over +=item --compact / -c + +Compacts the Xapian DBs after indexing. This is recommended +when using C<--reindex> to avoid running out of disk space +while indexing multiple inboxes. + +While option takes a negligible amount of time compared to +C<--reindex>, it requires temporarily duplicating the entire +contents of the Xapian DB. + +This switch may be specified twice, in which case compaction +happens both before and after indexing to minimize the temporal +footprint of the (re)indexing operation. + =item --reindex Forces a re-index of all messages in the inbox. This can be used for in-place upgrades and bugfixes while NNTP/HTTP server processes are utilizing the index. Keep in mind this roughly doubles the size of the already-large -Xapian database. Running L -afterwards is recommended to release free space. +Xapian database. Using this with C<--compact> or running +L afterwards is recommended to +release free space. -This does not touch the NNTP article number database. +This does not touch the NNTP article number database or +affect threading. =item --prune diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index e684f546..ce979ea2 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -19,6 +19,7 @@ use constant { sub new { my ($class, $ibx, $creat_opt) = @_; + return $ibx if ref($ibx) eq $class; my $self = bless $ibx, $class; # TODO: maybe stop supporting this diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 7414c9b6..8e2b9063 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -217,13 +217,15 @@ sub prepare_run { ($tmp, \@queue); } +sub check_compact () { runnable_or_die($XAPIAN_COMPACT) } + sub run { my ($ibx, $task, $opt) = @_; # task = 'cpdb' or 'compact' my $cb = \&${\"PublicInbox::Xapcmd::$task"}; PublicInbox::Admin::progress_prepare($opt ||= {}); defined(my $dir = $ibx->{inboxdir}) or die "no inboxdir defined\n"; -d $dir or die "inboxdir=$dir does not exist\n"; - runnable_or_die($XAPIAN_COMPACT) if $opt->{compact}; + check_compact() if $opt->{compact}; my $reindex; # v1:{ from => $x40 }, v2:{ from => [ $x40, $x40, .. ] } } if (!$opt->{-coarse_lock}) { diff --git a/script/public-inbox-index b/script/public-inbox-index index c6910420..7def9964 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -11,12 +11,19 @@ use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $usage = "public-inbox-index INBOX_DIR"; use PublicInbox::Admin; PublicInbox::Admin::require_or_die('-index'); +use PublicInbox::Xapcmd; -my $opt = { quiet => -1 }; -GetOptions($opt, qw(verbose|v+ reindex jobs|j=i prune indexlevel|L=s)) +my $compact_opt; +my $opt = { quiet => -1, compact => 0 }; +GetOptions($opt, qw(verbose|v+ reindex compact|c+ jobs|j=i prune indexlevel|L=s)) or die "bad command-line args\n$usage"; die "--jobs must be positive\n" if defined $opt->{jobs} && $opt->{jobs} <= 0; +if ($opt->{compact}) { + require PublicInbox::Xapcmd; + PublicInbox::Xapcmd::check_compact(); + $compact_opt = { -coarse_lock => 1, compact => 1 }; +} my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV); PublicInbox::Admin::require_or_die('-index'); @@ -31,4 +38,11 @@ foreach my $ibx (@ibxs) { PublicInbox::Admin::require_or_die(keys %$mods); PublicInbox::Admin::progress_prepare($opt); -PublicInbox::Admin::index_inbox($_, undef, $opt) for @ibxs; +for my $ibx (@ibxs) { + $ibx = PublicInbox::InboxWritable->new($ibx); + if ($opt->{compact} >= 2) { + PublicInbox::Xapcmd::run($ibx, 'compact', $compact_opt); + } + PublicInbox::Admin::index_inbox($ibx, undef, $opt); + PublicInbox::Xapcmd::run($ibx, 'compact', $compact_opt) if $compact_opt; +} diff --git a/t/convert-compact.t b/t/convert-compact.t index 1671caad..70609c7d 100644 --- a/t/convert-compact.t +++ b/t/convert-compact.t @@ -115,4 +115,17 @@ my $msgs = $ibx->recent({limit => 1000}); is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); is(scalar @$msgs, 1, 'only one message in history'); +$ibx = undef; +$err = ''; +$cmd = [ qw(-index --reindex -c), "$tmpdir/v2" ]; +ok(run_script($cmd, undef, $rdr), '--reindex -c'); +like($err, qr/xapian-compact/, 'xapian-compact ran (-c)'); + +$rdr->{2} = \(my $err2 = ''); +$cmd = [ qw(-index --reindex -cc), "$tmpdir/v2" ]; +ok(run_script($cmd, undef, $rdr), '--reindex -c -c'); +like($err2, qr/xapian-compact/, 'xapian-compact ran (-c -c)'); +ok(scalar(split(/\n/, $err2)) > scalar(split(/\n/, $err)), + '-compacted twice'); + done_testing();