# indexes a message, returns true if checkpointing is needed
sub do_idx ($$$$$$$) {
my ($self, $msgref, $mime, $len, $num, $oid, $mid0) = @_;
- $self->{over}->add_overview($mime, $len, $num, $oid, $mid0);
+ my $smsg = bless {
+ bytes => $len,
+ num => $num,
+ blob => $oid,
+ mid => $mid0,
+ }, 'PublicInbox::Smsg';
+ $self->{over}->add_overview($mime, $smsg, $self);
my $idx = idx_shard($self, $num % $self->{shards});
- $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime);
+ $idx->index_raw($len, $msgref, $num, $oid, $mid0, $mime, $self);
my $n = $self->{transact_bytes} += $len;
$n >= (PublicInbox::SearchIdx::BATCH_BYTES * $self->{shards});
}
defined $num or return; # duplicate
defined $mid0 or die "BUG: $mid0 undefined\n";
my $im = $self->importer;
- my $cmt = $im->add($mime);
+ my $cmt = $im->add($mime, undef, $self); # sets $self->{(au|co)time}
$cmt = $im->get_mark($cmt);
$self->{last_commit}->[$self->{epoch_max}] = $cmt;
# crap, Message-ID is already known, hope somebody just resent:
foreach my $m (@$mids) {
# read-only lookup now safe to do after above barrier
- my $existing = lookup_content($self, $mime, $m);
# easy, don't store duplicates
# note: do not add more diagnostic info here since
# it gets noisy on public-inbox-watch restarts
- return () if $existing;
+ return () if content_exists($self, $mime, $m);
}
# AltId may pre-populate article numbers (e.g. X-Mail-Count
$ibx->msg_by_smsg($smsg);
}
-sub lookup_content ($$$) {
+sub content_exists ($$$) {
my ($self, $mime, $mid) = @_;
my $over = $self->{over};
my $cids = content_ids($mime);
next;
}
my $cur = PublicInbox::MIME->new($msg);
- if (content_matches($cids, $cur)) {
- $smsg->{mime} = $cur;
- return $smsg;
- }
-
+ return 1 if content_matches($cids, $cur);
# XXX DEBUG_DIFF is experimental and may be removed
diff($mid, $cur, $mime) if $ENV{DEBUG_DIFF};
$pr->("$i.git indexing $range\n");
}
- my @cmd = qw(log --raw -r --pretty=tformat:%H
+ my @cmd = qw(log --raw -r --pretty=tformat:%H.%at.%ct
--no-notes --no-color --no-abbrev --no-renames);
my $fh = $self->{reindex_pipe} = $git->popen(@cmd, $range);
my $cmt;
while (<$fh>) {
chomp;
$self->{current_info} = "$i.git $_";
- if (/\A$x40$/o && !defined($cmt)) {
- $cmt = $_;
+ if (/\A($x40)\.([0-9]+)\.([0-9]+)$/o) {
+ $cmt //= $1;
+ $self->{autime} = $2;
+ $self->{cotime} = $3;
} elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\tm$/o) {
reindex_oid($self, $sync, $git, $1);
} elsif (/\A:\d{6} 100644 $x40 ($x40) [AM]\td$/o) {