+ }
+ waitpid($pid, 0);
+ die "@$cmd failed: \$?=$?\n" if $?;
+}
+
+sub cpdb_loop ($$$;$$) {
+ my ($src, $dst, $pr_data, $cur_shard, $reshard) = @_;
+ my ($pr, $fmt, $nr, $pfx);
+ if ($pr_data) {
+ $pr = $pr_data->{pr};
+ $fmt = $pr_data->{fmt};
+ $nr = \($pr_data->{nr});
+ $pfx = $pr_data->{pfx};
+ }
+
+ my ($it, $end);
+ do {
+ eval {
+ $it = $src->postlist_begin('');
+ $end = $src->postlist_end('');
+ };
+ } while (cpdb_retryable($src, $pfx));
+
+ do {
+ eval {
+ for (; $it != $end; $it++) {
+ my $docid = $it->get_docid;
+ if (defined $reshard) {
+ my $dst_shard = $docid % $reshard;
+ next if $dst_shard != $cur_shard;
+ }
+ my $doc = $src->get_document($docid);
+ $dst->replace_document($docid, $doc);
+ if ($pr_data && !(++$$nr & 1023)) {
+ $pr->(sprintf($fmt, $$nr));
+ }
+ }
+
+ # unlike copydatabase(1), we don't copy spelling
+ # and synonym data (or other user metadata) since
+ # the Perl APIs don't expose iterators for them
+ # (and public-inbox does not use those features)
+ };
+ } while (cpdb_retryable($src, $pfx));
+}
+
+# Like copydatabase(1), this is horribly slow; and it doesn't seem due
+# to the overhead of Perl.
+sub cpdb ($$) { # cb_spawn callback
+ my ($args, $opt) = @_;
+ my ($old, $newdir) = @$args;
+ my $new = $newdir->dirname;
+ my ($src, $cur_shard);
+ my $reshard;
+ PublicInbox::SearchIdx::load_xapian_writable();
+ my $XapianDatabase = $PublicInbox::Search::X{Database};
+ if (ref($old) eq 'ARRAY') {
+ ($cur_shard) = ($new =~ m!(?:xap|ei)[0-9]+/([0-9]+)\b!);
+ defined $cur_shard or
+ die "BUG: could not extract shard # from $new";
+ $reshard = $opt->{reshard};
+ defined $reshard or die 'BUG: got array src w/o --reshard';
+
+ # resharding, M:N copy means have full read access
+ foreach (@$old) {
+ if ($src) {
+ my $sub = $XapianDatabase->new($_);
+ $src->add_database($sub);
+ } else {
+ $src = $XapianDatabase->new($_);
+ }
+ }
+ } else {
+ $src = $XapianDatabase->new($old);
+ }
+
+ my ($tmp, $ft);
+ local @SIG{keys %SIG} = values %SIG;
+ if ($opt->{compact}) {
+ my ($dir) = ($new =~ m!(.*?/)[^/]+/*\z!);
+ same_fs_or_die($dir, $new);
+ $ft = File::Temp->newdir("$new.compact-XXXX", DIR => $dir);
+ setup_signals();
+ $tmp = $ft->dirname;
+ PublicInbox::Syscall::nodatacow_dir($tmp);
+ } else {
+ $tmp = $new;
+ }
+
+ # like copydatabase(1), be sure we don't overwrite anything in case
+ # of other bugs:
+ my $flag = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
+ die if $@;
+ my $XapianWritableDatabase = $PublicInbox::Search::X{WritableDatabase};
+ $flag |= $PublicInbox::SearchIdx::DB_NO_SYNC if !$opt->{fsync};
+ my $dst = $XapianWritableDatabase->new($tmp, $flag);
+ my $pr = $opt->{-progress};
+ my $pfx = $opt->{-progress_pfx} = progress_pfx($new);
+ my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr;
+
+ do {
+ eval {
+ # update the only metadata key for v1:
+ my $lc = $src->get_metadata('last_commit');
+ $dst->set_metadata('last_commit', $lc) if $lc;
+
+ # only the first xapian shard (0) gets 'indexlevel'
+ if ($new =~ m!(?:xapian[0-9]+|xap[0-9]+/0)\b!) {
+ my $l = $src->get_metadata('indexlevel');
+ if ($l eq 'medium') {
+ $dst->set_metadata('indexlevel', $l);
+ }
+ }
+ if ($pr_data) {
+ my $tot = $src->get_doccount;
+
+ # we can only estimate when resharding,
+ # because removed spam causes slight imbalance
+ my $est = '';
+ if (defined $cur_shard && $reshard > 1) {
+ $tot = int($tot/$reshard);
+ $est = 'around ';
+ }
+ my $fmt = "$pfx % ".length($tot)."u/$tot\n";
+ $pr->("$pfx copying $est$tot documents\n");
+ $pr_data->{fmt} = $fmt;
+ $pr_data->{total} = $tot;
+ }
+ };
+ } while (cpdb_retryable($src, $pfx));
+
+ if (defined $reshard) {
+ # we rely on document IDs matching NNTP article number,
+ # so we can't have the Xapian sharding DB support rewriting
+ # document IDs. Thus we iterate through each shard
+ # individually.
+ $src = undef;
+ foreach (@$old) {
+ my $old = $XapianDatabase->new($_);
+ cpdb_loop($old, $dst, $pr_data, $cur_shard, $reshard);
+ }
+ } else {
+ cpdb_loop($src, $dst, $pr_data);
+ }
+
+ $pr->(sprintf($pr_data->{fmt}, $pr_data->{nr})) if $pr;
+ return unless $opt->{compact};
+
+ $src = $dst = undef; # flushes and closes
+
+ # this is probably the best place to do xapian-compact
+ # since $dst isn't readable by HTTP or NNTP clients, yet:
+ compact([ $tmp, $new ], $opt);
+ remove_tree($tmp) or die "failed to remove $tmp: $!\n";