my $idx = $self->idx_part($part);
$idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime);
my $n = $self->{transact_bytes} += $len;
+ $sync->{nr}++;
if ($n > (PublicInbox::SearchIdx::BATCH_BYTES * $nparts)) {
$git->cleanup;
$sync->{mm_tmp}->atfork_prepare;
$self->done; # release lock
- # TODO: print progress info, here
+ if (my $pr = $sync->{-opt}->{-progress}) {
+ my ($bn) = (split('/', $git->{git_dir}))[-1];
+ $pr->("$bn ".sprintf($sync->{-regen_fmt}, $sync->{nr}));
+ }
# allow -watch or -mda to write...
$self->idx_init; # reacquire lock
# returns a revision range for git-log(1)
sub log_range ($$$$$) {
my ($self, $sync, $git, $i, $tip) = @_;
- my $cur = $sync->{ranges}->[$i] or return $tip; # all of it
+ my $opt = $sync->{-opt};
+ my $pr = $opt->{-progress} if (($opt->{verbose} || 0) > 1);
+ my $cur = $sync->{ranges}->[$i] or do {
+ $pr->("$i.git indexing all of $tip") if $pr;
+ return $tip; # all of it
+ };
+
my $range = "$cur..$tip";
+ $pr->("$i.git checking contiguity... ") if $pr;
if (is_ancestor($git, $cur, $tip)) { # common case
+ $pr->("OK\n") if $pr;
my $n = $git->qx(qw(rev-list --count), $range);
chomp($n);
if ($n == 0) {
$sync->{ranges}->[$i] = undef;
+ $pr->("$i.git has nothing new\n") if $pr;
return; # nothing to do
}
+ $pr->("$i.git has $n changes since $cur\n") if $pr;
} else {
+ $pr->("FAIL\n") if $pr;
warn <<"";
discontiguous range: $range
Rewritten history? (in $git->{git_dir})
$range;
}
-sub sync_prepare ($$$$) {
- my ($self, $sync, $opts, $epoch_max) = @_;
- my $pr = $opts->{-progress};
+sub sync_prepare ($$$) {
+ my ($self, $sync, $epoch_max) = @_;
+ my $pr = $sync->{-opt}->{-progress};
my $regen_max = 0;
my $head = $self->{-inbox}->{ref_head} || 'refs/heads/master';
# reindex stops at the current heads and we later rerun index_sync
# without {reindex}
- my $reindex_heads = last_commits($self, $epoch_max) if $opts->{reindex};
+ my $reindex_heads = last_commits($self, $epoch_max) if $sync->{reindex};
for (my $i = $epoch_max; $i >= 0; $i--) {
die 'BUG: already indexing!' if $self->{reindex_pipe};
$sync->{ranges}->[$i] = $range;
# can't use 'rev-list --count' if we use --diff-filter
- $pr->("$i.git counting changes\n\t$range ... ") if $pr;
+ $pr->("$i.git counting $range ... ") if $pr;
my $n = 0;
my $fh = $git->popen(qw(log --pretty=tformat:%H
--no-notes --no-color --no-renames
}
# reindex should NOT see new commits anymore, if we do,
# it's a problem and we need to notice it via die()
- return -1 if $opts->{reindex};
+ my $pad = length($regen_max) + 1;
+ $sync->{-regen_fmt} = "% ${pad}u/$regen_max\n";
+ $sync->{nr} = 0;
+ return -1 if $sync->{reindex};
$regen_max + $self->{mm}->num_highwater() || 0;
}
my $x40 = qr/[a-f0-9]{40}/;
sub unindex ($$$$) {
- my ($self, $opts, $git, $unindex_range) = @_;
+ my ($self, $sync, $git, $unindex_range) = @_;
my $un = $self->{unindexed} ||= {}; # num => removal count
my $before = scalar keys %$un;
my @cmd = qw(log --raw -r
delete $self->{reindex_pipe};
$fh = undef;
- return unless $opts->{prune};
+ return unless $sync->{-opt}->{prune};
my $after = scalar keys %$un;
return if $before == $after;
# public, called by public-inbox-index
sub index_sync {
- my ($self, $opts) = @_;
- $opts ||= {};
+ my ($self, $opt) = @_;
+ $opt ||= {};
+ my $pr = $opt->{-progress};
my $epoch_max;
my $latest = git_dir_latest($self, \$epoch_max);
return unless defined $latest;
- $self->idx_init($opts); # acquire lock
+ $self->idx_init($opt); # acquire lock
my $sync = {
mm_tmp => $self->{mm}->tmp_clone,
D => {}, # "$mid\0$cid" => $oid
- reindex => $opts->{reindex},
+ reindex => $opt->{reindex},
+ -opt => $opt
};
$sync->{ranges} = sync_ranges($self, $sync, $epoch_max);
- $sync->{regen} = sync_prepare($self, $sync, $opts, $epoch_max);
+ $sync->{regen} = sync_prepare($self, $sync, $epoch_max);
my @cmd = qw(log --raw -r --pretty=tformat:%H
--no-notes --no-color --no-abbrev --no-renames);
fill_alternates($self, $i);
my $git = PublicInbox::Git->new($git_dir);
my $unindex_range = delete $sync->{"unindex-range.$i"};
- unindex($self, $opts, $git, $unindex_range) if $unindex_range;
+ unindex($self, $sync, $git, $unindex_range) if $unindex_range;
defined(my $range = $sync->{ranges}->[$i]) or next;
+ $pr->("$i.git indexing $range\n") if $pr;
my $fh = $self->{reindex_pipe} = $git->popen(@cmd, $range);
my $cmt;
while (<$fh>) {
$git->cleanup;
}
$self->done;
+ if (my $pr = $sync->{-opt}->{-progress}) {
+ $pr->('all.git '.sprintf($sync->{-regen_fmt}, $sync->{nr}));
+ }
# reindex does not pick up new changes, so we rerun w/o it:
- if ($opts->{reindex}) {
- my %again = %$opts;
+ if ($opt->{reindex}) {
+ my %again = %$opt;
$sync = undef;
delete @again{qw(reindex -skip_lock)};
index_sync($self, \%again);