]> Sergey Matveev's repositories - public-inbox.git/commitdiff
import: (v2) delete writes the blob into history in subdir
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>
Mon, 19 Mar 2018 08:14:41 +0000 (08:14 +0000)
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>
Mon, 19 Mar 2018 08:16:34 +0000 (08:16 +0000)
This makes it easier to audit deletes with "git log -p" and
prevents an unstable specification of "content_id" from being
stored in history.

This should be cost-free if done in the same partition (and even
cheaper than before as it introduces no new blobs).  It does
have a higher cost across partitions, but is probably irrelevant
given the typical ham:spam ratio.

lib/PublicInbox/Import.pm
lib/PublicInbox/V2Writable.pm
t/v2writable.t

index e20c6e031f6fb14a64373637a631f8e1ff9edc93..94a49fe666f755a1b6da154548a2ea1acc9fe6de 100644 (file)
@@ -11,7 +11,6 @@ use Fcntl qw(:flock :DEFAULT);
 use PublicInbox::Spawn qw(spawn);
 use PublicInbox::MID qw(mid_mime mid2path);
 use PublicInbox::Address;
-use PublicInbox::ContentId qw(content_id);
 use PublicInbox::MsgTime qw(msg_timestamp);
 
 sub new {
@@ -163,7 +162,6 @@ sub get_mark {
 # ('MISMATCH', Email::MIME) on mismatch
 # (:MARK, Email::MIME) on success
 #
-# For v2 inboxes, the content_id is returned instead of the msg
 # v2 callers should check with Xapian before calling this as
 # it is not idempotent.
 sub remove {
@@ -179,10 +177,17 @@ sub remove {
                ($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime);
                return ($err, $cur) if $err;
        } else {
-               $cur = content_id($mime);
-               my $len = length($cur);
+               my $sref;
+               if (ref($mime) eq 'SCALAR') { # optimization used by V2Writable
+                       $sref = $mime;
+               } else { # XXX should not be necessary:
+                       my $str = $mime->as_string;
+                       $sref = \$str;
+               }
+               my $len = length($$sref);
                $blob = $self->{mark}++;
-               print $w "blob\nmark :$blob\ndata $len\n$cur\n" or wfail;
+               print $w "blob\nmark :$blob\ndata $len\n",
+                       $$sref, "\n" or wfail;
        }
 
        my $ref = $self->{ref};
index 656f0693a261f7dd92800557fd2f9c1ae30e2409..fd9bf615c66da24d88abd7bf2b091cd561dbf85a 100644 (file)
@@ -220,6 +220,7 @@ sub remove {
                                warn "broken smsg for $mid\n";
                                return 1; # continue
                        }
+                       my $orig = $$msg;
                        my $cur = PublicInbox::MIME->new($msg);
                        if (content_id($cur) eq $cid) {
                                $mm->num_delete($smsg->num);
@@ -227,7 +228,8 @@ sub remove {
                                # no bugs in our deduplication code:
                                $removed = $smsg;
                                $removed->{mime} = $cur;
-                               $im->remove($cur, $cmt_msg);
+                               $im->remove(\$orig, $cmt_msg);
+                               $orig = undef;
                                $removed->num; # memoize this for callers
 
                                my $oid = $smsg->{blob};
index 6e37b722aa19496ce4b90ed22f804696618c8cf5..a5c982e90be592c735870e28f11aeb4e41f1e1da 100644 (file)
@@ -191,6 +191,7 @@ EOF
 {
        local $ENV{NPROC} = 2;
        my @before = $git0->qx(qw(log --pretty=oneline));
+       my $before = $git0->qx(qw(log --pretty=raw --raw -r --no-abbrev));
        $im = PublicInbox::V2Writable->new($ibx, 1);
        is($im->{partitions}, 1, 'detected single partition from previous');
        my $smsg = $im->remove($mime, 'test removal');
@@ -207,6 +208,14 @@ EOF
        my @found = ();
        $srch->each_smsg_by_mid($smsg->mid, sub { push @found, @_; 1 });
        is(scalar(@found), 0, 'no longer found in Xapian skeleton');
+
+       my $after = $git0->qx(qw(log -1 --pretty=raw --raw -r --no-abbrev));
+       if ($after =~ m!( [a-f0-9]+ )A\td$!) {
+               my $oid = $1;
+               ok(index($before, $oid) > 0, 'no new blob introduced');
+       } else {
+               fail('failed to extract blob from log output');
+       }
 }
 
 done_testing();