use PublicInbox::Spawn qw(spawn);
use PublicInbox::SearchIdx;
use IO::Handle;
+use File::Temp qw(tempfile);
# an estimate of the post-packed size to the raw uncompressed size
my $PACKING_FACTOR = 0.4;
# $creat may be any true value, or 0/undef. A hashref is true,
# and $creat->{nproc} may be set to an integer
my ($class, $v2ibx, $creat) = @_;
- my $dir = $v2ibx->{inboxdir} or die "no inboxdir in inbox\n";
+ $v2ibx = PublicInbox::InboxWritable->new($v2ibx);
+ my $dir = $v2ibx->assert_usable_dir;
unless (-d $dir) {
if ($creat) {
require File::Path;
die "$dir does not exist\n";
}
}
-
- $v2ibx = PublicInbox::InboxWritable->new($v2ibx);
$v2ibx->umask_prepare;
my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION;
# XXX experimental
sub diff ($$$) {
my ($mid, $cur, $new) = @_;
- use File::Temp qw(tempfile);
my ($ah, $an) = tempfile('email-cur-XXXXXXXX', TMPDIR => 1);
print $ah $cur->as_string or die "print: $!";
}
# only for a few odd messages with multiple Message-IDs
-sub reindex_oid_m ($$$$) {
- my ($self, $sync, $git, $oid) = @_;
+sub reindex_oid_m ($$$$;$) {
+ my ($self, $sync, $git, $oid, $regen_num) = @_;
+ $self->{current_info} = "multi_mid $oid";
my ($num, $mid0, $len);
my $msgref = $git->cat_file($oid, \$len);
my $mime = PublicInbox::MIME->new($$msgref);
delete($sync->{D}->{"$mid\0$cid"}) and
die "BUG: reindex_oid should handle <$mid> delete";
}
+ my $over = $self->{over};
for my $mid (reverse @$mids) {
- ($num, $mid0) = $self->{over}->num_mid0_for_oid($oid, $mid);
- last if defined $num;
+ ($num, $mid0) = $over->num_mid0_for_oid($oid, $mid);
+ next unless defined $num;
+ if (defined($regen_num) && $regen_num != $num) {
+ die "BUG: regen(#$regen_num) != over(#$num)";
+ }
}
unless (defined($num)) {
for my $mid (reverse @$mids) {
# is this a number we got before?
- $num = $sync->{mm_tmp}->num_for($mid);
- next unless defined $num;
- $mid0 = $mid;
+ my $n = $sync->{mm_tmp}->num_for($mid);
+ next unless defined $n;
+ next if defined($regen_num) && $regen_num != $n;
+ ($num, $mid0) = ($n, $mid);
last;
}
}
if (defined($num)) {
$sync->{mm_tmp}->num_delete($num);
- } else {
- $num = $sync->{regen}--;
- if ($num <= 0) {
- # fixup bugs in old mirrors on reindex
- for my $mid (reverse @$mids) {
- $num = $self->{mm}->mid_insert($mid);
- next unless defined $num;
- $mid0 = $mid;
- last;
- }
- if (defined $mid0) {
- if ($sync->{reindex}) {
- warn "reindex added #$num <$mid0>\n";
- }
- } else {
- warn "E: cannot find article #\n";
- return;
- }
- } else { # $num > 0, use the new article number
- for my $mid (reverse @$mids) {
- $self->{mm}->mid_set($num, $mid) == 1 or next;
- $mid0 = $mid;
- last;
- }
- unless (defined $mid0) {
- warn "E: cannot regen #$num\n";
- return;
+ } elsif (defined $regen_num) {
+ $num = $regen_num;
+ for my $mid (reverse @$mids) {
+ $self->{mm}->mid_set($num, $mid) == 1 or next;
+ $mid0 = $mid;
+ last;
+ }
+ unless (defined $mid0) {
+ warn "E: cannot regen #$num\n";
+ return;
+ }
+ } else { # fixup bugs in old mirrors on reindex
+ for my $mid (reverse @$mids) {
+ $num = $self->{mm}->mid_insert($mid);
+ next unless defined $num;
+ $mid0 = $mid;
+ last;
+ }
+ if (defined $mid0) {
+ if ($sync->{reindex}) {
+ warn "reindex added #$num <$mid0>\n";
}
+ } else {
+ warn "E: cannot find article #\n";
+ return;
}
}
$sync->{nr}++;
}
}
+# reuse Msgmap to store num => oid mapping (rather than num => mid)
+sub multi_mid_q_new () {
+ my ($fh, $fn) = tempfile('multi_mid-XXXXXXX', EXLOCK => 0, TMPDIR => 1);
+ my $multi_mid = PublicInbox::Msgmap->new_file($fn, 1);
+ $multi_mid->{dbh}->do('PRAGMA synchronous = OFF');
+ # for Msgmap->DESTROY:
+ $multi_mid->{tmp_name} = $fn;
+ $multi_mid->{pid} = $$;
+ close $fh or die "failed to close $fn: $!";
+ $multi_mid
+}
+
+sub multi_mid_q_push ($$) {
+ my ($sync, $oid) = @_;
+ my $multi_mid = $sync->{multi_mid} //= multi_mid_q_new();
+ if ($sync->{reindex}) { # no regen on reindex
+ $multi_mid->mid_insert($oid);
+ } else {
+ my $num = $sync->{regen}--;
+ die "BUG: ran out of article numbers" if $num <= 0;
+ $multi_mid->mid_set($num, $oid);
+ }
+}
+
sub reindex_oid ($$$$) {
my ($self, $sync, $git, $oid) = @_;
my ($num, $mid0, $len);
check_unindexed($self, $num, $mid0);
} else {
$num = $sync->{regen}--;
- die "BUG: ran out of article numbers\n" if $num <= 0;
+ die "BUG: ran out of article numbers" if $num <= 0;
if ($self->{mm}->mid_set($num, $mid) != 1) {
warn "E: unable to assign $num => <$mid>\n";
return;
# do not delete from {mm_tmp}, since another
# single-MID message may use it.
} else { # handle them at the end:
- push @{$sync->{multi_mid} //= []}, $oid;
+ multi_mid_q_push($sync, $oid);
}
return;
}
--no-notes --no-color --no-renames
--diff-filter=AM), $range, '--', 'm');
++$n while <$fh>;
+ close $fh or die "git log failed: \$?=$?";
$pr->("$n\n") if $pr;
$regen_max += $n;
}
unindex_oid($self, $git, $1, $unindexed);
}
delete $self->{reindex_pipe};
- $fh = undef;
+ close $fh or die "git log failed: \$?=$?";
return unless $sync->{-opt}->{prune};
my $after = scalar keys %$unindexed;
# ensure any blob can not longer be accessed via dumb HTTP
PublicInbox::Import::run_die(['git', "--git-dir=$git->{git_dir}",
- qw(-c gc.reflogExpire=now gc --prune=all)]);
+ qw(-c gc.reflogExpire=now gc --prune=all --quiet)]);
}
sub sync_ranges ($$$) {
mark_deleted($self, $sync, $git, $1);
}
}
- $fh = undef;
+ close $fh or die "git log failed: \$?=$?";
delete $self->{reindex_pipe};
update_last_commit($self, $git, $i, $cmt) if defined $cmt;
}
}
if (my $multi_mid = delete $sync->{multi_mid}) {
$git //= $self->{-inbox}->git;
-
- while (defined(my $oid = pop(@$multi_mid))) {
- $self->{current_info} = "multi_mid $oid";
- reindex_oid_m($self, $sync, $git, $oid);
+ my ($min, $max) = $multi_mid->minmax;
+ if ($sync->{reindex}) {
+ # we may need to create new Message-IDs if mirrors
+ # were initially indexed with old versions
+ for (my $i = $max; $i >= $min; $i--) {
+ my $oid = $multi_mid->mid_for($i);
+ next unless defined $oid;
+ reindex_oid_m($self, $sync, $git, $oid);
+ }
+ } else { # regen on initial index
+ for my $num ($min..$max) {
+ my $oid = $multi_mid->mid_for($num);
+ next unless defined $oid;
+ reindex_oid_m($self, $sync, $git, $oid, $num);
+ }
}
- $git->cleanup if $git;
}
+ $git->cleanup if $git;
$self->done;
if (my $nr = $sync->{nr}) {