We need to hide removals from anybody hitting the search engine.
$sth->execute;
}
+sub num_delete {
+ my ($self, $num) = @_;
+ my $dbh = $self->{dbh};
+ my $sth = $dbh->prepare('DELETE FROM msgmap WHERE num = ?');
+ $sth->bind_param(1, $num);
+ $sth->execute;
+}
+
sub create_tables {
my ($dbh) = @_;
my $e;
}
}
+# MID is a hint in V2
+sub remove_by_oid {
+ my ($self, $oid, $mid) = @_;
+ my $db = $self->{xdb};
+
+ # XXX careful, we cannot use batch_do here since we conditionally
+ # delete documents based on other factors, so we cannot call
+ # find_doc_ids twice.
+ my ($head, $tail) = $self->find_doc_ids('Q' . $mid);
+ return if $head == $tail;
+
+ # there is only ONE element in @delete unless we
+ # have bugs in our v2writable deduplication check
+ my @delete;
+ for (; $head != $tail; $head->inc) {
+ my $docid = $head->get_docid;
+ my $doc = $db->get_document($docid);
+ my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
+ $smsg->load_expand;
+ push(@delete, $docid) if $smsg->{blob} eq $oid;
+ }
+ $db->delete_document($_) foreach @delete;
+ scalar(@delete);
+}
+
sub term_generator { # write-only
my ($self) = @_;
$? == 0 or die ref($self)." pid:$pid exited with: $?";
}
+# triggers remove_by_oid in partition or skeleton
+sub remote_remove {
+ my ($self, $oid, $mid) = @_;
+ print { $self->{w} } "D $oid $mid\n" or
+ die "failed to write remove $!";
+}
+
1;
$txn = undef;
print { $self->{skeleton}->{w} } "barrier $part\n" or
die "write failed to skeleton: $!\n";
+ } elsif ($line =~ /\AD ([a-f0-9]{40,}) (.+)\n\z/s) {
+ my ($oid, $mid) = ($1, $2);
+ $xdb ||= $self->_xdb_acquire;
+ if (!$txn) {
+ $xdb->begin_transaction;
+ $txn = 1;
+ }
+ $self->remove_by_oid($oid, $mid);
} else {
chomp $line;
my ($len, $artnum, $oid, $mid0) = split(/ /, $line);
print $barrier_note "barrier_done\n" or die
"print failed to barrier note: $!";
}
+ } elsif ($line =~ /\AD ([a-f0-9]{40,}) (.*)\n\z/s) {
+ my ($oid, $mid) = ($1, $2);
+ $xdb ||= $self->_xdb_acquire;
+ if (!$txn) {
+ $xdb->begin_transaction;
+ $txn = 1;
+ }
+ $self->remove_by_oid($oid, $mid);
} else {
my $len = int($line);
my $n = read($r, my $msg, $len) or die "read: $!\n";
die "print failed: $err\n" if $err;
}
+sub remote_remove {
+ my ($self, $oid, $mid) = @_;
+ my $err;
+ $self->_lock_acquire;
+ eval { $self->SUPER::remote_remove($oid, $mid) };
+ $err = $@;
+ $self->_lock_release;
+ die $err if $err;
+}
+
# values: [ TS, NUM, BYTES, LINES, MID, XPATH, doc_data ]
sub index_skeleton_real ($$) {
my ($self, $values) = @_;
# :bytes and :lines metadata in RFC 3977
sub bytes ($) { get_val($_[0]->{doc}, &PublicInbox::Search::BYTES) }
sub lines ($) { get_val($_[0]->{doc}, &PublicInbox::Search::LINES) }
-sub num ($) { get_val($_[0]->{doc}, &PublicInbox::Search::NUM) }
+sub num ($) {
+ $_[0]->{num} ||= get_val($_[0]->{doc}, PublicInbox::Search::NUM)
+}
sub __hdr ($$) {
my ($self, $field) = @_;
}
sub remove {
- my ($self, $mime, $msg) = @_;
- my $existing = $self->lookup_content($mime) or return;
-
- # don't touch ghosts or already junked messages
- return unless $existing->type eq 'mail';
-
- # always write removals to the current (latest) git repo since
- # we process chronologically
+ my ($self, $mime, $cmt_msg) = @_;
+ $self->barrier;
+ $self->idx_init;
my $im = $self->importer;
- my ($cmt, undef) = $im->remove($mime, $msg);
- $cmt = $im->get_mark($cmt);
- $self->unindex_msg($existing, $cmt);
+ my $ibx = $self->{-inbox};
+ my $srch = $ibx->search;
+ my $cid = content_id($mime);
+ my $skel = $self->{skel};
+ my $parts = $self->{idx_parts};
+ my $mm = $skel->{mm};
+ my $removed;
+ my $mids = mids($mime->header_obj);
+ foreach my $mid (@$mids) {
+ $srch->reopen->each_smsg_by_mid($mid, sub {
+ my ($smsg) = @_;
+ $smsg->load_expand;
+ my $msg = $ibx->msg_by_smsg($smsg);
+ if (!defined($msg)) {
+ warn "broken smsg for $mid\n";
+ return 1; # continue
+ }
+ my $cur = PublicInbox::MIME->new($msg);
+ if (content_id($cur) eq $cid) {
+ $mm->num_delete($smsg->num);
+ # $removed should only be set once assuming
+ # no bugs in our deduplication code:
+ $removed = $smsg;
+ $removed->{mime} = $cur;
+ $im->remove($cur, $cmt_msg);
+ $removed->num; # memoize this for callers
+
+ my $oid = $smsg->{blob};
+ foreach my $idx (@$parts, $skel) {
+ $idx->remote_remove($oid, $mid);
+ }
+ }
+ 1; # continue
+ });
+ $self->barrier;
+ }
+ $removed;
}
sub done {
};
is($im->{partitions}, 1, 'one partition when forced');
ok($im->add($mime), 'ordinary message added');
+my $git0;
if ('ensure git configs are correct') {
my @cmd = (qw(git config), "--file=$mainrepo/all.git/config",
qw(core.sharedRepository 0644));
is(system(@cmd), 0, "set sharedRepository in all.git");
- my $git0 = PublicInbox::Git->new("$mainrepo/git/0.git");
+ $git0 = PublicInbox::Git->new("$mainrepo/git/0.git");
my $fh = $git0->popen(qw(config core.sharedRepository));
my $v = eval { local $/; <$fh> };
chomp $v;
};
{
local $ENV{NPROC} = 2;
+ my @before = $git0->qx(qw(log --pretty=oneline));
$im = PublicInbox::V2Writable->new($ibx, 1);
is($im->{partitions}, 1, 'detected single partition from previous');
+ my $smsg = $im->remove($mime, 'test removal');
+ my @after = $git0->qx(qw(log --pretty=oneline));
+ $im->done;
+ my $tip = shift @after;
+ like($tip, qr/\A[a-f0-9]+ test removal\n\z/s,
+ 'commit message propaged to git');
+ is_deeply(\@after, \@before, 'only one commit written to git');
+ is($ibx->mm->num_for($smsg->mid), undef, 'no longer in Msgmap by mid');
+ like($smsg->num, qr/\A\d+\z/, 'numeric number in return message');
+ is($ibx->mm->mid_for($smsg->num), undef, 'no longer in Msgmap by num');
+ my $srch = $ibx->search->reopen;
+ my @found = ();
+ $srch->each_smsg_by_mid($smsg->mid, sub { push @found, @_; 1 });
+ is(scalar(@found), 0, 'no longer found in Xapian skeleton');
}
done_testing();