From dfed6cc6f2881c77478174dd5eb9b93352b1f1c1 Mon Sep 17 00:00:00 2001
From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
Date: Mon, 19 Mar 2018 23:24:50 +0000
Subject: [PATCH] content_id: do not take Message-Id into account

If we need to use content_id, we've already lost hope
in relying on Message-Id as a differentiator.  This
prevents duplicates from showing up repeatedly with
-watch when Message-Ids are reused and we generate
new Message-Ids to disambiguate.
---
 lib/PublicInbox/ContentId.pm |  3 ++-
 t/v2writable.t               | 10 +++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/lib/PublicInbox/ContentId.pm b/lib/PublicInbox/ContentId.pm
index 9082b769..279eec0c 100644
--- a/lib/PublicInbox/ContentId.pm
+++ b/lib/PublicInbox/ContentId.pm
@@ -21,7 +21,8 @@ sub content_digest ($) {
 	# in SearchIdx, so treat them the same for this:
 	my %seen;
 	foreach my $mid (@{mids($hdr)}) {
-		$dig->add('mid: '.$mid);
+		# do NOT consider the Message-ID as part of the content_id
+		# if we got here, we've already got Message-ID reuse
 		$seen{$mid} = 1;
 	}
 	foreach my $mid (@{references($hdr)}) {
diff --git a/t/v2writable.t b/t/v2writable.t
index 85b48d2a..6cabf0d5 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -61,11 +61,15 @@ if ('ensure git configs are correct') {
 
 	@warn = ();
 	$mime->header_set('Message-Id', '<a-mid@b>', '<c@d>');
-	ok($im->add($mime), 'secondary MID used');
+	is($im->add($mime), undef, 'secondary MID ignored if first matches');
+	my $sec = PublicInbox::MIME->new($mime->as_string);
+	$sec->header_set('Date');
+	$sec->header_set('Message-Id', '<a-mid@b>', '<c@d>');
+	ok($im->add($sec), 'secondary MID used if data is different');
 	like(join(' ', @warn), qr/mismatched/, 'warned about mismatch');
 	like(join(' ', @warn), qr/alternative/, 'warned about alternative');
 	is_deeply([ '<a-mid@b>', '<c@d>' ],
-		[ $mime->header_obj->header_raw('Message-Id') ],
+		[ $sec->header_obj->header_raw('Message-Id') ],
 		'no new Message-Id added');
 
 	my $sane_mid = qr/\A<[\w\-]+\@localhost>\z/;
@@ -85,7 +89,7 @@ if ('ensure git configs are correct') {
 	my $gen = PublicInbox::Import::digest2mid(content_digest($mime));
 	unlike($gen, qr![\+/=]!, 'no URL-unfriendly chars in Message-Id');
 	my $fake = PublicInbox::MIME->new($mime->as_string);
-	$fake->header_set('Message-Id', $gen);
+	$fake->header_set('Message-Id', "<$gen>");
 	ok($im->add($fake), 'fake added easily');
 	is_deeply(\@warn, [], 'no warnings from a faker');
 	ok($im->add($mime), 'random MID made');
-- 
2.51.0