]> Sergey Matveev's repositories - public-inbox.git/commitdiff
index: max out XAPIAN_FLUSH_THRESHOLD if using --batch-size
authorEric Wong <e@yhbt.net>
Fri, 7 Aug 2020 10:52:16 +0000 (10:52 +0000)
committerEric Wong <e@yhbt.net>
Fri, 7 Aug 2020 23:46:13 +0000 (23:46 +0000)
If XAPIAN_FLUSH_THRESHOLD is unset, Xapian will default to
10000.  That limits the effectiveness of users specifying
extremely large values of --batch-size.

While we're at it, localize the changes to globals since -index
may be eval-ed in tests (and perhaps production code in the
future).

script/public-inbox-index

index 56df5bfec6faed0a9338c88cecf5230607dcf3a9..e2bca16e92510221a0f95998277be6351dcac6f8 100755 (executable)
@@ -42,11 +42,16 @@ if (defined $max_size) {
                die "`publicInbox.indexMaxSize=$max_size' not parsed\n";
 }
 
-if (my $bs = $opt->{batchsize} // $cfg->{lc('publicInbox.indexBatchSize')}) {
+my $bs = $opt->{batchsize} // $cfg->{lc('publicInbox.indexBatchSize')};
+if (defined $bs) {
        PublicInbox::Admin::parse_unsigned(\$bs) or
                die "`publicInbox.indexBatchSize=$bs' not parsed\n";
-       $PublicInbox::SearchIdx::BATCH_BYTES = $bs;
 }
+local $PublicInbox::SearchIdx::BATCH_BYTES = $bs if defined($bs);
+
+# out-of-the-box builds of Xapian 1.4.x are still limited to 32-bit
+# https://getting-started-with-xapian.readthedocs.io/en/latest/concepts/indexing/limitations.html
+local $ENV{XAPIAN_FLUSH_THRESHOLD} ||= '4294967295' if defined($bs);
 
 my $s = $opt->{sequentialshard} //
                        $cfg->{lc('publicInbox.indexSequentialShard')};