total_bytes => 0,
current_info => '',
xpfx => $xpfx,
- over => PublicInbox::OverIdx->new("$xpfx/over.sqlite3", 1),
+ over => PublicInbox::OverIdx->new("$xpfx/over.sqlite3"),
lock_path => "$dir/inbox.lock",
# limit each git repo (epoch) to 1GB or so
rotate_bytes => int((1024 * 1024 * 1024) / $PACKING_FACTOR),
- last_commit => [], # git repo -> commit
+ last_commit => [], # git epoch -> commit
};
+ $self->{over}->{-no_sync} = 1 if $v2ibx->{-no_sync};
$self->{shards} = count_shards($self) || nproc_shards($creat);
$self->{index_max_size} = $v2ibx->{index_max_size};
bless $self, $class;
# Now that all subprocesses are up, we can open the FDs
# for SQLite:
my $mm = $self->{mm} = PublicInbox::Msgmap->new_file(
- "$self->{ibx}->{inboxdir}/msgmap.sqlite3", 1);
+ "$self->{ibx}->{inboxdir}/msgmap.sqlite3",
+ $self->{ibx}->{-no_sync} ? 2 : 1);
$mm->{dbh}->begin_work;
}
sub atfork_child {
my ($self) = @_;
- my $fh = delete $self->{reindex_pipe};
- close $fh if $fh;
if (my $shards = $self->{idx_shards}) {
$_->atfork_child foreach @$shards;
}
$self->{bnote}->[1];
}
-sub reindex_checkpoint ($$$) {
- my ($self, $sync, $git) = @_;
+sub reindex_checkpoint ($$) {
+ my ($self, $sync) = @_;
- $git->cleanup;
$sync->{mm_tmp}->atfork_prepare;
$self->done; # release lock
if (my $pr = $sync->{-opt}->{-progress}) {
- my ($bn) = (split('/', $git->{git_dir}))[-1];
- $pr->("$bn ".sprintf($sync->{-regen_fmt}, $sync->{nr}));
+ $pr->(sprintf($sync->{-regen_fmt}, $sync->{nr}));
}
# allow -watch or -mda to write...
$sync->{mm_tmp}->atfork_parent;
}
-sub reindex_oid ($$$$) {
- my ($self, $sync, $git, $oid) = @_;
+sub reindex_oid ($$$) {
+ my ($self, $sync, $oid) = @_;
return if PublicInbox::SearchIdx::too_big($self, $oid);
my ($num, $mid0, $len);
- my $msgref = $git->cat_file($oid, \$len);
+ my $msgref = $self->{ibx}->git->cat_file($oid, \$len);
return if $len == 0; # purged
my $mime = PublicInbox::Eml->new($$msgref);
my $mids = mids($mime->header_obj);
}, 'PublicInbox::Smsg';
$smsg->populate($mime, $sync);
if (do_idx($self, $msgref, $mime, $smsg)) {
- reindex_checkpoint($self, $sync, $git);
+ reindex_checkpoint($self, $sync);
}
}
my $reindex_heads = last_commits($self, $epoch_max) if $sync->{reindex};
for (my $i = $epoch_max; $i >= 0; $i--) {
- die 'BUG: already indexing!' if $self->{reindex_pipe};
my $git_dir = git_dir_n($self, $i);
-d $git_dir or next; # missing epochs are fine
my $git = PublicInbox::Git->new($git_dir);
# our code and blindly injects "d" file history into git repos
if (my @leftovers = keys %{delete($sync->{D}) // {}}) {
warn('W: unindexing '.scalar(@leftovers)." leftovers\n");
- my $git = $self->{ibx}->git;
for my $oid (@leftovers) {
$oid = unpack('H*', $oid);
$self->{current_info} = "leftover $oid";
- unindex_oid($self, $git, $oid);
+ unindex_oid($self, $oid);
}
- $git->cleanup;
}
return 0 if (!$regen_max && !keys(%{$self->{unindex_range}}));
}
}
-sub unindex_oid ($$$;$) {
- my ($self, $git, $oid, $unindexed) = @_;
+sub unindex_oid ($$;$) {
+ my ($self, $oid, $unindexed) = @_;
my $mm = $self->{mm};
- my $msgref = $git->cat_file($oid);
+ my $msgref = $self->{ibx}->git->cat_file($oid);
my $mime = PublicInbox::Eml->new($msgref);
my $mids = mids($mime->header_obj);
$mime = $msgref = undef;
}
}
+# this is rare, it only happens when we get discontiguous history in
+# a mirror because the source used -purge or -edit
sub unindex ($$$$) {
my ($self, $sync, $git, $unindex_range) = @_;
my $unindexed = $self->{unindexed} ||= {}; # $mid0 => $num
# order does not matter, here:
my @cmd = qw(log --raw -r
--no-notes --no-color --no-abbrev --no-renames);
- my $fh = $self->{reindex_pipe} = $git->popen(@cmd, $unindex_range);
+ my $fh = $git->popen(@cmd, $unindex_range);
while (<$fh>) {
/\A:\d{6} 100644 $OID ($OID) [AM]\tm$/o or next;
- unindex_oid($self, $git, $1, $unindexed);
+ unindex_oid($self, $1, $unindexed);
}
- delete $self->{reindex_pipe};
close $fh or die "git log failed: \$?=$?";
return unless $sync->{-opt}->{prune};
my ($self, $sync, $i) = @_;
my $git_dir = git_dir_n($self, $i);
- die 'BUG: already reindexing!' if $self->{reindex_pipe};
-d $git_dir or return; # missing epochs are fine
my $git = PublicInbox::Git->new($git_dir);
- if (my $unindex_range = delete $sync->{unindex_range}->{$i}) {
+ if (my $unindex_range = delete $sync->{unindex_range}->{$i}) { # rare
unindex($self, $sync, $git, $unindex_range);
}
defined(my $stk = $sync->{stacks}->[$i]) or return;
if ($f eq 'm') {
$sync->{autime} = $at;
$sync->{cotime} = $ct;
- reindex_oid($self, $sync, $git, $oid);
+ reindex_oid($self, $sync, $oid);
} elsif ($f eq 'd') {
- unindex_oid($self, $git, $oid);
+ unindex_oid($self, $oid);
}
}
delete @$sync{qw(autime cotime)};