+# Like copydatabase(1), this is horribly slow; and it doesn't seem due
+# to the overhead of Perl.
+sub cpdb ($$) {
+ my ($args, $opt) = @_;
+ my ($old, $newdir) = @$args;
+ my $new = $newdir->dirname;
+ my ($src, $cur_shard);
+ my $reshard;
+ PublicInbox::SearchIdx::load_xapian_writable() or die;
+ my $XapianDatabase = $PublicInbox::Search::X{Database};
+ if (ref($old) eq 'ARRAY') {
+ ($cur_shard) = ($new =~ m!xap[0-9]+/([0-9]+)\b!);
+ defined $cur_shard or
+ die "BUG: could not extract shard # from $new";
+ $reshard = $opt->{reshard};
+ defined $reshard or die 'BUG: got array src w/o --reshard';
+
+ # resharding, M:N copy means have full read access
+ foreach (@$old) {
+ if ($src) {
+ my $sub = $XapianDatabase->new($_);
+ $src->add_database($sub);
+ } else {
+ $src = $XapianDatabase->new($_);
+ }
+ }
+ } else {
+ $src = $XapianDatabase->new($old);
+ }
+
+ my ($tmp, $ft);
+ local %SIG = %SIG;
+ if ($opt->{compact}) {
+ my $dir = dirname($new);
+ same_fs_or_die($dir, $new);
+ $ft = File::Temp->newdir("$new.compact-XXXXXX", DIR => $dir);
+ setup_signals();
+ $tmp = $ft->dirname;
+ } else {
+ $tmp = $new;
+ }
+
+ # like copydatabase(1), be sure we don't overwrite anything in case
+ # of other bugs:
+ my $creat = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
+ die if $@;
+ my $XapianWritableDatabase = $PublicInbox::Search::X{WritableDatabase};
+ my $dst = $XapianWritableDatabase->new($tmp, $creat);
+ my $pr = $opt->{-progress};
+ my $pfx = $opt->{-progress_pfx} = progress_pfx($new);
+ my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr;
+
+ do {
+ eval {
+ # update the only metadata key for v1:
+ my $lc = $src->get_metadata('last_commit');
+ $dst->set_metadata('last_commit', $lc) if $lc;
+
+ # only the first xapian shard (0) gets 'indexlevel'
+ if ($new =~ m!(?:xapian[0-9]+|xap[0-9]+/0)\b!) {
+ my $l = $src->get_metadata('indexlevel');
+ if ($l eq 'medium') {
+ $dst->set_metadata('indexlevel', $l);
+ }
+ }
+ if ($pr_data) {
+ my $tot = $src->get_doccount;
+
+ # we can only estimate when resharding,
+ # because removed spam causes slight imbalance
+ my $est = '';
+ if (defined $cur_shard && $reshard > 1) {
+ $tot = int($tot/$reshard);
+ $est = 'around ';
+ }
+ my $fmt = "$pfx % ".length($tot)."u/$tot\n";
+ $pr->("$pfx copying $est$tot documents\n");
+ $pr_data->{fmt} = $fmt;
+ $pr_data->{total} = $tot;
+ }
+ };
+ } while (cpdb_retryable($src, $pfx));
+
+ if (defined $reshard) {
+ # we rely on document IDs matching NNTP article number,
+ # so we can't have the Xapian sharding DB support rewriting
+ # document IDs. Thus we iterate through each shard
+ # individually.
+ $src = undef;
+ foreach (@$old) {
+ my $old = $XapianDatabase->new($_);
+ cpdb_loop($old, $dst, $pr_data, $cur_shard, $reshard);
+ }
+ } else {
+ cpdb_loop($src, $dst, $pr_data);
+ }
+
+ $pr->(sprintf($pr_data->{fmt}, $pr_data->{nr})) if $pr;