From 325fbe26c3e7731eb53bea7f2ce2627c8828e0d8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 18 Oct 2021 05:09:05 +0000 Subject: [PATCH] v2: mirrors don't clobber msgs w/ reused Message-IDs For odd messages with reused Message-IDs, the second message showing up in a mirror (via git-fetch + -index) should never clobber an entry with a different blob in over. This is noticeable only if the messages arrive in-between indexing runs. Fixes: 4441a38481ed ("v2: index forwards (via `git log --reverse')") --- MANIFEST | 1 + lib/PublicInbox/V2Writable.pm | 7 ++++++- t/v2index-late-dupe.t | 37 +++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 t/v2index-late-dupe.t diff --git a/MANIFEST b/MANIFEST index b5aae777..af1522d7 100644 --- a/MANIFEST +++ b/MANIFEST @@ -552,6 +552,7 @@ t/v1-add-remove-add.t t/v1reindex.t t/v2-add-remove-add.t t/v2dupindex.t +t/v2index-late-dupe.t t/v2mda.t t/v2mirror.t t/v2reindex.t diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 3914383c..ed5182ae 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -813,8 +813,8 @@ sub index_oid { # cat_async callback } } } + my $oidx = $self->{oidx}; if (!defined($num)) { # reuse if reindexing (or duplicates) - my $oidx = $self->{oidx}; for my $mid (@$mids) { ($num, $mid0) = $oidx->num_mid0_for_oid($oid, $mid); last if defined $num; @@ -822,6 +822,11 @@ sub index_oid { # cat_async callback } $mid0 //= do { # is this a number we got before? $num = $arg->{mm_tmp}->num_for($mids->[0]); + + # don't clobber existing if Message-ID is reused: + if (my $x = defined($num) ? $oidx->get_art($num) : undef) { + undef($num) if $x->{blob} ne $oid; + } defined($num) ? $mids->[0] : undef; }; if (!defined($num)) { diff --git a/t/v2index-late-dupe.t b/t/v2index-late-dupe.t new file mode 100644 index 00000000..c83e3409 --- /dev/null +++ b/t/v2index-late-dupe.t @@ -0,0 +1,37 @@ +# Copyright (C) all contributors +# License: AGPL-3.0+ +# +# this simulates a mirror path: git fetch && -index +use strict; use v5.10.1; use PublicInbox::TestCommon; +use Test::More; # redundant, used for bisect +require_mods 'v2'; +require PublicInbox::Import; +require PublicInbox::Inbox; +require PublicInbox::Git; +my ($tmpdir, $for_destroy) = tmpdir(); +my $inboxdir = "$tmpdir/i"; +PublicInbox::Import::init_bare(my $e0 = "$inboxdir/git/0.git"); +open my $fh, '>', "$inboxdir/inbox.lock" or xbail $!; +my $git = PublicInbox::Git->new($e0); +my $im = PublicInbox::Import->new($git, qw(i i@example.com)); +$im->{lock_path} = undef; +$im->{path_type} = 'v2'; +my $eml = eml_load('t/plack-qp.eml'); +ok($im->add($eml), 'add original'); +$im->done; +run_script([qw(-index -Lbasic), $inboxdir]); +is($?, 0, 'basic index'); +my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir }); +my $orig = $ibx->over->get_art(1); + +my @mid = $eml->header_raw('Message-ID'); +$eml->header_set('Message-ID', @mid, ''); +ok($im->add($eml), 'add another'); +$im->done; +run_script([qw(-index -Lbasic), $inboxdir]); +is($?, 0, 'basic index again'); + +my $after = $ibx->over->get_art(1); +is_deeply($after, $orig, 'original unchanged') or note explain([$orig,$after]); + +done_testing; -- 2.44.0