From 9030233f83bc2e30f9529ea484da123ea9d98119 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 7 Aug 2020 10:52:16 +0000 Subject: [PATCH] index: max out XAPIAN_FLUSH_THRESHOLD if using --batch-size If XAPIAN_FLUSH_THRESHOLD is unset, Xapian will default to 10000. That limits the effectiveness of users specifying extremely large values of --batch-size. While we're at it, localize the changes to globals since -index may be eval-ed in tests (and perhaps production code in the future). --- script/public-inbox-index | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/script/public-inbox-index b/script/public-inbox-index index 56df5bfe..e2bca16e 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -42,11 +42,16 @@ if (defined $max_size) { die "`publicInbox.indexMaxSize=$max_size' not parsed\n"; } -if (my $bs = $opt->{batchsize} // $cfg->{lc('publicInbox.indexBatchSize')}) { +my $bs = $opt->{batchsize} // $cfg->{lc('publicInbox.indexBatchSize')}; +if (defined $bs) { PublicInbox::Admin::parse_unsigned(\$bs) or die "`publicInbox.indexBatchSize=$bs' not parsed\n"; - $PublicInbox::SearchIdx::BATCH_BYTES = $bs; } +local $PublicInbox::SearchIdx::BATCH_BYTES = $bs if defined($bs); + +# out-of-the-box builds of Xapian 1.4.x are still limited to 32-bit +# https://getting-started-with-xapian.readthedocs.io/en/latest/concepts/indexing/limitations.html +local $ENV{XAPIAN_FLUSH_THRESHOLD} ||= '4294967295' if defined($bs); my $s = $opt->{sequentialshard} // $cfg->{lc('publicInbox.indexSequentialShard')}; -- 2.44.0