L<public-inbox-compact(1)> afterwards is recommended to
release free space.
+public-inbox protects writes to various indices with L<flock(2)>,
+so it is safe to reindex while L<public-inbox-watch(1)>,
+L<public-inbox-mda(1)> or L<public-inbox-learn(1)> run.
+
This does not touch the NNTP article number database or
affect threading.
per-invocation basis. See L</publicinbox.indexMaxSize>
below.
+=item --batch-size SIZE
+
+Sets or overrides L</publicinbox.indexBatchSize> on a
+per-invocation basis. See L</publicinbox.indexBatchSize>
+below.
+
=back
=head1 FILES
Default: none
+=item publicinbox.indexBatchSize
+
+Flushes changes to the filesystem and releases locks after
+indexing the given number of bytes. The default value of C<1m>
+(one megabyte) is low to minimize memory use and reduce
+contention with parallel invocations of L<public-inbox-mda(1)>,
+L<public-inbox-learn(1)>, and L<public-inbox-watch(1)>.
+
+Increase this value on powerful systems to improve throughput at
+the expense of memory use. The reduction of lock granularity
+may not be noticeable on fast systems.
+
+This option is available in public-inbox 1.6 or later.
+public-inbox 1.5 and earlier used the current default, C<1m>.
+
+For L<public-inbox-v2-format(5)> inboxes, this value is
+multiplied by the number of Xapian shards. Thus a typical v2
+inbox with 3 shards will flush every 3 megabytes by default.
+
+Default: 1m (one megabyte)
+
=back
=head1 ENVIRONMENT
disk. This environment is handled directly by Xapian, refer to
Xapian API documentation for more details.
-Default: our indexing code flushes every megabyte of mail seen
-to keep memory usage low. Setting this environment variable to
-any positive value will switch to a document count-based
-threshold in Xapian.
+For public-inbox 1.6 and later, use C<publicinbox.indexBatchSize>
+instead. Setting C<XAPIAN_FLUSH_THRESHOLD> for a large C<--reindex>
+may cause L<public-inbox-mda(1)>, L<public-inbox-learn(1)> and
+L<public-inbox-watch(1)> tasks to wait long periods of time
+during C<--reindex>.
+
+Default: none, uses C<publicinbox.indexBatchSize>
=back
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
my $X = \%PublicInbox::Search::X;
my ($DB_CREATE_OR_OPEN, $DB_OPEN);
-use constant {
- BATCH_BYTES => defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ?
- 0x7fffffff : 1_000_000,
- DEBUG => !!$ENV{DEBUG},
-};
+our $BATCH_BYTES = defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ?
+ 0x7fffffff : 1_000_000;
+use constant DEBUG => !!$ENV{DEBUG};
my $xapianlevels = qr/\A(?:full|medium)\z/;
my ($max, $bytes, $batch_cb, $latest, $nr) = @_;
$$max -= $bytes;
if ($$max <= 0) {
- $$max = BATCH_BYTES;
+ $$max = $BATCH_BYTES;
$batch_cb->($nr, $latest);
}
}
my $git = $self->{git};
my $latest;
my $bytes;
- my $max = BATCH_BYTES;
+ my $max = $BATCH_BYTES;
local $/ = "\n";
my %D;
my $line;
my $idx = idx_shard($self, $smsg->{num} % $self->{shards});
$idx->index_raw($msgref, $mime, $smsg);
my $n = $self->{transact_bytes} += $smsg->{bytes};
- $n >= (PublicInbox::SearchIdx::BATCH_BYTES * $self->{shards});
+ $n >= ($PublicInbox::SearchIdx::BATCH_BYTES * $self->{shards});
}
sub _add {
my $compact_opt;
my $opt = { quiet => -1, compact => 0, maxsize => undef };
GetOptions($opt, qw(verbose|v+ reindex compact|c+ jobs|j=i prune
- indexlevel|L=s maxsize|max-size=s))
+ indexlevel|L=s maxsize|max-size=s batchsize|batch-size=s))
or die "bad command-line args\n$usage";
die "--jobs must be positive\n" if defined $opt->{jobs} && $opt->{jobs} <= 0;
my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, undef, $cfg);
PublicInbox::Admin::require_or_die('-index');
unless (@ibxs) { print STDERR "Usage: $usage\n"; exit 1 }
-my $mods = {};
+
my $max_size = $opt->{maxsize} // $cfg->{lc('publicInbox.indexMaxSize')};
if (defined $max_size) {
PublicInbox::Admin::parse_unsigned(\$max_size) or
die "`publicInbox.indexMaxSize=$max_size' not parsed\n";
}
+if (my $bs = $opt->{batchsize} // $cfg->{lc('publicInbox.indexBatchSize')}) {
+ PublicInbox::Admin::parse_unsigned(\$bs) or
+ die "`publicInbox.indexBatchSize=$bs' not parsed\n";
+ $PublicInbox::SearchIdx::BATCH_BYTES = $bs;
+}
+
+my $mods = {};
foreach my $ibx (@ibxs) {
# XXX: users can shoot themselves in the foot, with opt->{indexlevel}
$ibx->{indexlevel} //= $opt->{indexlevel} //