Creating mm_tmp is an expensive operation with large inboxes
and can be avoided if there are no new messages to process.
Since git-fetch(1) currently lacks an --exit-code option(*),
mirrors will run `public-inbox-index' unconditionally after
fetch, which is an expensive op if it needs to duplicate
a large SQLite DB.
This speeds up the mirror case of:
git --git-dir=git/$EPOCH.git fetch && public-inbox-index
This reduces the no-op `public-inbox-index' time from over 8s to
~0.5s on a (currently) 7-epoch clone of https://lore.kernel.org/lkml/
on my system.
(*) WIP --exit-code for git-fetch:
https://public-inbox.org/git/87ftphw7mv.fsf@evledraar.gmail.com/
$pr->("$n\n") if $pr;
$regen_max += $n;
}
$pr->("$n\n") if $pr;
$regen_max += $n;
}
+
+ return 0 if (!$regen_max && !keys(%{$self->{unindex_range}}));
+
# reindex should NOT see new commits anymore, if we do,
# it's a problem and we need to notice it via die()
my $pad = length($regen_max) + 1;
# reindex should NOT see new commits anymore, if we do,
# it's a problem and we need to notice it via die()
my $pad = length($regen_max) + 1;
return unless defined $latest;
$self->idx_init($opt); # acquire lock
my $sync = {
return unless defined $latest;
$self->idx_init($opt); # acquire lock
my $sync = {
- mm_tmp => $self->{mm}->tmp_clone,
D => {}, # "$mid\0$cid" => $oid
unindex_range => {}, # EPOCH => oid_old..oid_new
reindex => $opt->{reindex},
D => {}, # "$mid\0$cid" => $oid
unindex_range => {}, # EPOCH => oid_old..oid_new
reindex => $opt->{reindex},
$sync->{ranges} = sync_ranges($self, $sync, $epoch_max);
$sync->{regen} = sync_prepare($self, $sync, $epoch_max);
$sync->{ranges} = sync_ranges($self, $sync, $epoch_max);
$sync->{regen} = sync_prepare($self, $sync, $epoch_max);
+ if ($sync->{regen}) {
+ # tmp_clone seems to fail if inside a transaction, so
+ # we rollback here (because we opened {mm} for reading)
+ # Note: we do NOT rely on DBI transactions for atomicity;
+ # only for batch performance.
+ $self->{mm}->{dbh}->rollback;
+ $self->{mm}->{dbh}->begin_work;
+ $sync->{mm_tmp} = $self->{mm}->tmp_clone;
+ }
+
# work backwards through history
for (my $i = $epoch_max; $i >= 0; $i--) {
index_epoch($self, $sync, $i);
# work backwards through history
for (my $i = $epoch_max; $i >= 0; $i--) {
index_epoch($self, $sync, $i);
$git->cleanup;
}
$self->done;
$git->cleanup;
}
$self->done;
- if (my $pr = $sync->{-opt}->{-progress}) {
- $pr->('all.git '.sprintf($sync->{-regen_fmt}, $sync->{nr}));
+
+ if (my $nr = $sync->{nr}) {
+ my $pr = $sync->{-opt}->{-progress};
+ $pr->('all.git '.sprintf($sync->{-regen_fmt}, $nr)) if $pr;
}
# reindex does not pick up new changes, so we rerun w/o it:
}
# reindex does not pick up new changes, so we rerun w/o it: