}
sub mark_deleted {
- my ($self, $D, $git, $oid) = @_;
+ my ($self, $sync, $git, $oid) = @_;
my $msgref = $git->cat_file($oid);
my $mime = PublicInbox::MIME->new($$msgref);
my $mids = mids($mime->header_obj);
my $cid = content_id($mime);
foreach my $mid (@$mids) {
- $D->{"$mid\0$cid"} = $oid;
+ $sync->{D}->{"$mid\0$cid"} = $oid;
}
}
sub reindex_oid {
- my ($self, $mm_tmp, $D, $git, $oid, $regen, $reindex) = @_;
+ my ($self, $sync, $git, $oid) = @_;
my $len;
my $msgref = $git->cat_file($oid, \$len);
my $mime = PublicInbox::MIME->new($$msgref);
my $cid = content_id($mime);
# get the NNTP article number we used before, highest number wins
- # and gets deleted from mm_tmp;
+ # and gets deleted from sync->{mm_tmp};
my $mid0;
my $num = -1;
my $del = 0;
foreach my $mid (@$mids) {
- $del += delete($D->{"$mid\0$cid"}) ? 1 : 0;
- my $n = $mm_tmp->num_for($mid);
+ $del += delete($sync->{D}->{"$mid\0$cid"}) ? 1 : 0;
+ my $n = $sync->{mm_tmp}->num_for($mid);
if (defined $n && $n > $num) {
$mid0 = $mid;
$num = $n;
$self->{mm}->mid_set($num, $mid0);
}
}
- if (!defined($mid0) && $regen && !$del) {
- $num = $$regen--;
+ if (!defined($mid0) && !$del) {
+ $num = $sync->{regen}--;
die "BUG: ran out of article numbers\n" if $num <= 0;
my $mm = $self->{mm};
foreach my $mid (reverse @$mids) {
if (!defined($mid0) || $del) {
if (!defined($mid0) && $del) { # expected for deletes
- $num = $$regen--;
- $self->{mm}->num_highwater($num) unless $reindex;
+ $num = $sync->{regen}--;
+ $self->{mm}->num_highwater($num) if !$sync->{reindex};
return
}
return;
}
- $mm_tmp->mid_delete($mid0) or
+ $sync->{mm_tmp}->mid_delete($mid0) or
die "failed to delete <$mid0> for article #$num\n";
$self->{over}->add_overview($mime, $len, $num, $oid, $mid0);
my $n = $self->{transact_bytes} += $len;
if ($n > (PublicInbox::SearchIdx::BATCH_BYTES * $nparts)) {
$git->cleanup;
- $mm_tmp->atfork_prepare;
+ $sync->{mm_tmp}->atfork_prepare;
$self->done; # release lock
# TODO: print progress info, here
# allow -watch or -mda to write...
$self->idx_init; # reacquire lock
- $mm_tmp->atfork_parent;
+ $sync->{mm_tmp}->atfork_parent;
}
}
# returns a revision range for git-log(1)
sub log_range ($$$$$) {
- my ($self, $git, $ranges, $i, $tip) = @_;
- my $cur = $ranges->[$i] or return $tip; # all of it
+ my ($self, $sync, $git, $i, $tip) = @_;
+ my $cur = $sync->{ranges}->[$i] or return $tip; # all of it
my $range = "$cur..$tip";
if (is_ancestor($git, $cur, $tip)) { # common case
my $n = $git->qx(qw(rev-list --count), $range);
chomp($n);
if ($n == 0) {
- $ranges->[$i] = undef;
+ $sync->{ranges}->[$i] = undef;
return; # nothing to do
}
} else {
$range;
}
-sub index_prepare {
- my ($self, $opts, $epoch_max, $ranges) = @_;
+sub sync_prepare {
+ my ($self, $sync, $opts, $epoch_max) = @_;
my $pr = $opts->{-progress};
my $regen_max = 0;
my $head = $self->{-inbox}->{ref_head} || 'refs/heads/master';
+
+ # reindex stops at the current heads and we later rerun index_sync
+ # without {reindex}
+ my $reindex_heads = last_commits($self, $epoch_max) if $opts->{reindex};
+
for (my $i = $epoch_max; $i >= 0; $i--) {
die 'BUG: already indexing!' if $self->{reindex_pipe};
my $git_dir = git_dir_n($self, $i);
-d $git_dir or next; # missing parts are fine
my $git = PublicInbox::Git->new($git_dir);
+ if ($reindex_heads) {
+ $head = $reindex_heads->[$i] or next;
+ }
chomp(my $tip = $git->qx(qw(rev-parse -q --verify), $head));
next if $?; # new repo
- my $range = log_range($self, $git, $ranges, $i, $tip) or next;
- $ranges->[$i] = $range;
+ my $range = log_range($self, $sync, $git, $i, $tip) or next;
+ $sync->{ranges}->[$i] = $range;
# can't use 'rev-list --count' if we use --diff-filter
$pr->("$i.git counting changes\n\t$range ... ") if $pr;
$pr->("$n\n") if $pr;
$regen_max += $n;
}
- \$regen_max;
+ # reindex should NOT see new commits anymore, if we do,
+ # it's a problem and we need to notice it via die()
+ return -1 if $opts->{reindex};
+ $regen_max + $self->{mm}->num_highwater() || 0;
}
sub unindex_oid_remote {
qw(-c gc.reflogExpire=now gc --prune=all)]);
}
-sub index_ranges ($$$) {
- my ($self, $reindex, $epoch_max) = @_;
- return last_commits($self, $epoch_max) unless $reindex;
+sub sync_ranges ($$$) {
+ my ($self, $sync, $epoch_max) = @_;
+ my $reindex = $sync->{reindex};
+ return last_commits($self, $epoch_max) unless $reindex;
return [] if ref($reindex) ne 'HASH';
my $ranges = $reindex->{from}; # arrayref;
my $latest = git_dir_latest($self, \$epoch_max);
return unless defined $latest;
$self->idx_init($opts); # acquire lock
- my $mm_tmp = $self->{mm}->tmp_clone;
- my $reindex = $opts->{reindex};
- my $ranges = index_ranges($self, $reindex, $epoch_max);
-
- my $high = $self->{mm}->num_highwater();
- my $regen = $self->index_prepare($opts, $epoch_max, $ranges);
- $$regen += $high if $high;
- my $D = {}; # "$mid\0$cid" => $oid
+ my $sync = {
+ mm_tmp => $self->{mm}->tmp_clone,
+ D => {}, # "$mid\0$cid" => $oid
+ reindex => $opts->{reindex},
+ };
+ $sync->{ranges} = sync_ranges($self, $sync, $epoch_max);
+ $sync->{regen} = sync_prepare($self, $sync, $opts, $epoch_max);
+
my @cmd = qw(log --raw -r --pretty=tformat:%H
--no-notes --no-color --no-abbrev --no-renames);
my $git = PublicInbox::Git->new($git_dir);
my $unindex = delete $self->{"unindex-range.$i"};
$self->unindex($opts, $git, $unindex) if $unindex;
- defined(my $range = $ranges->[$i]) or next;
+ defined(my $range = $sync->{ranges}->[$i]) or next;
my $fh = $self->{reindex_pipe} = $git->popen(@cmd, $range);
my $cmt;
while (<$fh>) {
if (/\A$x40$/o && !defined($cmt)) {
$cmt = $_;
} elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\tm$/o) {
- $self->reindex_oid($mm_tmp, $D, $git, $1,
- $regen, $reindex);
+ $self->reindex_oid($sync, $git, $1);
} elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\td$/o) {
- $self->mark_deleted($D, $git, $1);
+ $self->mark_deleted($sync, $git, $1);
}
}
$fh = undef;
# unindex is required for leftovers if "deletes" affect messages
# in a previous fetch+index window:
- if (scalar keys %$D) {
+ if (my @leftovers = values %{delete $sync->{D}}) {
my $git = $self->{-inbox}->git;
- $self->unindex_oid($git, $_) for values %$D;
+ $self->unindex_oid($git, $_) for @leftovers;
$git->cleanup;
}
$self->done;
+
+ # reindex does not pick up new changes, so we rerun w/o it:
+ if ($opts->{reindex}) {
+ my %again = %$opts;
+ $sync = undef;
+ delete @again{qw(reindex -skip_lock)};
+ index_sync($self, \%again);
+ }
}
1;