will be 1 less than the JOBS value, since there is a single
process which distributes work to the Xapian shards.
+=item -L LEVEL, --index-level=LEVEL
+
+=item -c, --compact
+
+=item -v, --verbose
+
+=item --no-fsync
+
+=item --sequential-shard
+
+=item --batch-size=BYTES
+
+=item --max-size=BYTES
+
+These options affect indexing. They have no effect if
+L</--no-index> is specified
+
+See L<public-inbox-index(1)> for a description of these options.
+
=back
=head1 ENVIRONMENT
1;
}
+sub index_prepare ($$) {
+ my ($opt, $cfg) = @_;
+ my $env;
+ if ($opt->{compact}) {
+ require PublicInbox::Xapcmd;
+ PublicInbox::Xapcmd::check_compact();
+ $opt->{compact_opt} = { -coarse_lock => 1, compact => 1 };
+ if (defined(my $jobs = $opt->{jobs})) {
+ $opt->{compact_opt}->{jobs} = $jobs;
+ }
+ }
+ for my $k (qw(max_size batch_size)) {
+ my $git_key = "publicInbox.index".ucfirst($k);
+ $git_key =~ s/_([a-z])/\U$1/g;
+ defined(my $v = $opt->{$k} // $cfg->{lc($git_key)}) or next;
+ parse_unsigned(\$v) or die "`$git_key=$v' not parsed\n";
+ $v > 0 or die "`$git_key=$v' must be positive\n";
+ $opt->{$k} = $v;
+ }
+
+ # out-of-the-box builds of Xapian 1.4.x are still limited to 32-bit
+ # https://getting-started-with-xapian.readthedocs.io/en/latest/concepts/indexing/limitations.html
+ $opt->{batch_size} and
+ $env = { XAPIAN_FLUSH_THRESHOLD => '4294967295' };
+
+ for my $k (qw(sequential_shard)) {
+ my $git_key = "publicInbox.index".ucfirst($k);
+ $git_key =~ s/_([a-z])/\U$1/g;
+ defined(my $s = $opt->{$k} // $cfg->{lc($git_key)}) or next;
+ defined(my $v = $cfg->git_bool($s))
+ or die "`$git_key=$s' not boolean\n";
+ $opt->{$k} = $v;
+ }
+ $env;
+}
+
1;
use PublicInbox::Spawn qw(spawn);
use Cwd 'abs_path';
use File::Copy 'cp'; # preserves permissions:
-my $usage = "Usage: public-inbox-convert OLD NEW\n";
-my $jobs;
-my $index = 1;
-my %opts = (
- '--jobs|j=i' => \$jobs,
- '--index!' => \$index,
-);
-GetOptions(%opts) or die "bad command-line args\n$usage";
+my $usage = 'Usage: public-inbox-convert [options] OLD NEW';
+my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
+usage: $usage
+
+ convert v1 format inboxes to v2
+
+options:
+
+ --no-index do not index after conversion
+ --jobs=NUM set shards (NUM=0)
+ --verbose | -v increase verbosity (may be repeated)
+ --help | -? show this help
+
+index options (see public-inbox-index(1) manpage for full description):
+
+ --no-fsync speed up indexing, risk corruption on power outage
+ -L LEVEL `basic', `medium', or `full' (default: full)
+ --compact | -c run public-inbox-compact(1) after indexing
+ --sequential-shard index Xapian shards sequentially for slow storage
+ --batch-size=BYTES flush changes to OS after a given number of bytes
+ --max-size=BYTES do not index messages larger than the given size
+
+See public-inbox-convert(1) man page for full documentation.
+EOF
+
+my $opt = {
+ index => 1,
+ # index defaults:
+ quiet => -1, compact => 0, maxsize => undef, fsync => 1,
+ reindex => 1, # we always reindex
+};
+GetOptions($opt, qw(jobs|j=i index! help|?),
+ # index options
+ qw(verbose|v+ rethread compact|c+ fsync|sync!
+ indexlevel|index-level|L=s max_size|max-size=s
+ batch_size|batch-size=s
+ sequential_shard|sequential-shard|seq-shard
+ )) or die <<EOF;
+bad command-line args\n$usage
+EOF
+if ($opt->{help}) { print $help; exit 0 };
my $old_dir = shift(@ARGV) or die $usage;
my $new_dir = shift(@ARGV) or die $usage;
die "$new_dir exists\n" if -d $new_dir;
die "$old_dir not a directory\n" unless -d $old_dir;
-my $config = PublicInbox::Config->new;
+my $cfg = PublicInbox::Config->new;
$old_dir = abs_path($old_dir);
my $old;
-if ($config) {
- $config->each_inbox(sub {
- $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir;
- });
-}
+$cfg->each_inbox(sub {
+ $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir;
+});
unless ($old) {
warn "W: $old_dir not configured in " .
PublicInbox::Config::default_file() . "\n";
}
$old->{indexlevel} //= PublicInbox::Admin::detect_indexlevel($old);
-if ($index) {
+my $env;
+if ($opt->{'index'}) {
my $mods = {};
PublicInbox::Admin::scan_ibx_modules($mods, $old);
PublicInbox::Admin::require_or_die(keys %$mods);
+ PublicInbox::Admin::progress_prepare($opt);
+ $env = PublicInbox::Admin::index_prepare($opt, $cfg);
}
-
+local %ENV = (%$env, %ENV) if $env;
my $new = { %$old };
$new->{inboxdir} = abs_path($new_dir);
$new->{version} = 2;
-$new = PublicInbox::InboxWritable->new($new);
+$new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} });
+$new->{-no_fsync} = 1 if !$opt->{fsync};
my $v2w;
$old->umask_prepare;
local $ENV{GIT_CONFIG} = $old_cfg;
my $new_cfg = "$new->{inboxdir}/all.git/config";
$v2w = PublicInbox::V2Writable->new($new, 1);
- $v2w->init_inbox($jobs);
+ $v2w->init_inbox(delete $opt->{jobs});
unlink $new_cfg;
link_or_copy($old_cfg, $new_cfg);
if (my $alt = $new->{altid}) {
my $state = '';
my $head = $old->{ref_head} || 'HEAD';
my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head);
-$v2w->idx_init;
+$v2w->idx_init($opt);
my $im = $v2w->importer;
my ($r, $w) = $im->gfi_start;
my $h = '[0-9a-f]';
# we want to trigger a reindex, not a from scratch index if
# we're reusing the msgmap from an existing v1 installation.
- $v2w->idx_init;
+ $v2w->idx_init($opt);
my $epoch0 = PublicInbox::Git->new($v2w->git_init(0));
chop(my $cmt = $epoch0->qx(qw(rev-parse --verify), $head));
$v2w->last_epoch_commit(0, $cmt);
}
-$v2w->index_sync({reindex => 1}) if $index;
+$v2w->index_sync($opt) if delete $opt->{'index'};
$v2w->done;
BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
See public-inbox-index(1) man page for full documentation.
EOF
-my $compact_opt;
my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 };
GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
fsync|sync! xapian_only|xapian-only
require PublicInbox::Admin;
PublicInbox::Admin::require_or_die('-index');
-if ($opt->{compact}) {
- require PublicInbox::Xapcmd;
- PublicInbox::Xapcmd::check_compact();
- $compact_opt = { -coarse_lock => 1, compact => 1 };
- if (defined(my $jobs = $opt->{jobs})) {
- $compact_opt->{jobs} = $jobs;
- }
-}
-
my $cfg = PublicInbox::Config->new; # Config is loaded by Admin
my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, undef, $cfg);
PublicInbox::Admin::require_or_die('-index');
unless (@ibxs) { print STDERR "Usage: $usage\n"; exit 1 }
-my $max_size = $opt->{max_size} // $cfg->{lc('publicInbox.indexMaxSize')};
-if (defined $max_size) {
- PublicInbox::Admin::parse_unsigned(\$max_size) or
- die "`publicInbox.indexMaxSize=$max_size' not parsed\n";
- $opt->{max_size} = $max_size;
-}
-
-my $bs = $opt->{batch_size} // $cfg->{lc('publicInbox.indexBatchSize')};
-if (defined $bs) {
- PublicInbox::Admin::parse_unsigned(\$bs) or
- die "`publicInbox.indexBatchSize=$bs' not parsed\n";
- $opt->{batch_size} = $bs;
-}
-
-# out-of-the-box builds of Xapian 1.4.x are still limited to 32-bit
-# https://getting-started-with-xapian.readthedocs.io/en/latest/concepts/indexing/limitations.html
-local $ENV{XAPIAN_FLUSH_THRESHOLD} ||= '4294967295' if defined($bs);
-
-my $s = $opt->{sequential_shard} //
- $cfg->{lc('publicInbox.indexSequentialShard')};
-if (defined $s) {
- my $v = $cfg->git_bool($s);
- defined($v) or
- die "`publicInbox.indexSequentialShard=$s' not boolean\n";
- $opt->{sequential_shard} = $v;
-}
-
my $mods = {};
foreach my $ibx (@ibxs) {
# XXX: users can shoot themselves in the foot, with opt->{indexlevel}
}
PublicInbox::Admin::require_or_die(keys %$mods);
+my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
+local %ENV = (%ENV, %$env) if $env;
require PublicInbox::InboxWritable;
PublicInbox::Admin::progress_prepare($opt);
for my $ibx (@ibxs) {
$ibx = PublicInbox::InboxWritable->new($ibx);
if ($opt->{compact} >= 2) {
- PublicInbox::Xapcmd::run($ibx, 'compact', $compact_opt);
+ PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt});
}
$ibx->{-no_fsync} = 1 if !$opt->{fsync};
$ibx_opt = { %$opt, sequential_shard => $v };
}
PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
- if ($compact_opt) {
- local $compact_opt->{jobs} = 0 if $ibx_opt->{sequential_shard};
- PublicInbox::Xapcmd::run($ibx, 'compact', $compact_opt);
+ if (my $copt = $opt->{compact_opt}) {
+ local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard};
+ PublicInbox::Xapcmd::run($ibx, 'compact', $copt);
}
}