};
# crap, Message-ID is already known, hope somebody just resent:
- $self->done; # write barrier, clears $self->{skel}
+ $self->barrier;
foreach my $m (@$mids) {
# read-only lookup now safe to do after above barrier
my $existing = $self->lookup_content($mime, $m);
# very unlikely:
warn "<$mid> reused for mismatched content\n";
- $self->idx_init;
# try the rest of the mids
foreach my $i (1..$#$mids) {
}
sub remove {
- my ($self, $mime, $msg) = @_;
- my $existing = $self->lookup_content($mime) or return;
-
- # don't touch ghosts or already junked messages
- return unless $existing->type eq 'mail';
-
- # always write removals to the current (latest) git repo since
- # we process chronologically
+ my ($self, $mime, $cmt_msg) = @_;
+ $self->barrier;
+ $self->idx_init;
my $im = $self->importer;
- my ($cmt, undef) = $im->remove($mime, $msg);
- $cmt = $im->get_mark($cmt);
- $self->unindex_msg($existing, $cmt);
+ my $ibx = $self->{-inbox};
+ my $srch = $ibx->search;
+ my $cid = content_id($mime);
+ my $skel = $self->{skel};
+ my $parts = $self->{idx_parts};
+ my $mm = $skel->{mm};
+ my $removed;
+ my $mids = mids($mime->header_obj);
+ foreach my $mid (@$mids) {
+ $srch->reopen->each_smsg_by_mid($mid, sub {
+ my ($smsg) = @_;
+ $smsg->load_expand;
+ my $msg = $ibx->msg_by_smsg($smsg);
+ if (!defined($msg)) {
+ warn "broken smsg for $mid\n";
+ return 1; # continue
+ }
+ my $orig = $$msg;
+ my $cur = PublicInbox::MIME->new($msg);
+ if (content_id($cur) eq $cid) {
+ $mm->num_delete($smsg->num);
+ # $removed should only be set once assuming
+ # no bugs in our deduplication code:
+ $removed = $smsg;
+ $removed->{mime} = $cur;
+ $im->remove(\$orig, $cmt_msg);
+ $orig = undef;
+ $removed->num; # memoize this for callers
+
+ my $oid = $smsg->{blob};
+ foreach my $idx (@$parts, $skel) {
+ $idx->remote_remove($oid, $mid);
+ }
+ }
+ 1; # continue
+ });
+ $self->barrier;
+ }
+ $removed;
}
sub done {
$self->searchidx_checkpoint(1);
}
+# issue a write barrier to ensure all data is visible to other processes
+# and read-only ops. Order of data importance is: git > SQLite > Xapian
+sub barrier {
+ my ($self) = @_;
+
+ # For safety, we ensure git checkpoint is complete before because
+ # the data in git is still more important than what is in Xapian.
+ # Performance may be gained by delaying ->progress call but we
+ # lose safety
+ if (my $im = $self->{im}) {
+ $im->checkpoint;
+ $im->progress('checkpoint');
+ }
+ my $skel = $self->{skel};
+ my $parts = $self->{idx_parts};
+ if ($parts && $skel) {
+ my $dbh = $skel->{mm}->{dbh};
+ $dbh->commit; # SQLite data is second in importance
+
+ # Now deal with Xapian
+ $skel->barrier_init(scalar(@$parts));
+ # each partition needs to issue a barrier command to skel:
+ $_->barrier foreach @$parts;
+
+ $skel->barrier_wait; # wait for each Xapian partition
+
+ $dbh->begin_work;
+ }
+ $self->{transact_bytes} = 0;
+}
+
sub searchidx_checkpoint {
my ($self, $more) = @_;
my ($self, $mime, $mid) = @_;
my $ibx = $self->{-inbox};
- my $srch = $ibx->search;
+ my $srch = $ibx->search->reopen;
my $cid = content_id($mime);
my $found;
$srch->each_smsg_by_mid($mid, sub {