]> Sergey Matveev's repositories - public-inbox.git/commitdiff
extindex: support --jobs/-j properly on creation for shard count
authorEric Wong <e@80x24.org>
Sun, 25 Jul 2021 12:44:23 +0000 (12:44 +0000)
committerEric Wong <e@80x24.org>
Sun, 25 Jul 2021 21:08:03 +0000 (21:08 +0000)
This wasn't wired up properly, but Xapian appears to suffer from
I/O amplification problems as DB shards get larger:

  https://lists.xapian.org/pipermail/xapian-discuss/2019-February/009727.html
  <23640.32170.703368.841021@y.dockes.com>

Of course, we shouldn't have too many shards, either; because
performance problems with too many shards was the entire reason
extindex was created:

  https://lists.xapian.org/pipermail/xapian-discuss/2020-August/009823.html
  <20200826064728.GA32239@dcvr>

lib/PublicInbox/ExtSearchIdx.pm
t/extsearch.t

index fb1f511e8bc3a7062d68744bfa37cb75dfa42e57..22edbb4b91126079e39d5844b53b4f7f85cf5558 100644 (file)
@@ -52,7 +52,8 @@ sub new {
                parallel => 1,
                lock_path => "$dir/ei.lock",
        }, __PACKAGE__;
-       $self->{shards} = $self->count_shards || nproc_shards($opt->{creat});
+       $self->{shards} = $self->count_shards ||
+               nproc_shards({ nproc => $opt->{jobs} });
        my $oidx = PublicInbox::OverIdx->new("$self->{xpfx}/over.sqlite3");
        $self->{-no_fsync} = $oidx->{-no_fsync} = 1 if !$opt->{fsync};
        $self->{oidx} = $oidx;
index 46a6f2ec5eb80b1f81a911486839c42cc5b0a654..1f62e80c8c1a2e7adcbcbbc3ff71d7506f5dee2d 100644 (file)
@@ -411,4 +411,15 @@ if ('dedupe + dry-run') {
                '--dry-run alone fails');
 }
 
+for my $j (1, 3, 6) {
+       my $o = { 2 => \(my $err = '') };
+       my $d = "$home/extindex-j$j";
+       ok(run_script(['-extindex', "-j$j", '--all', $d], undef, $o),
+               "init with -j$j");
+       my $max = $j - 2;
+       $max = 0 if $max < 0;
+       my @dirs = glob("$d/ei*/?");
+       like($dirs[-1], qr!/ei[0-9]+/$max\z!, '-j works');
+}
+
 done_testing;