From: Eric Wong (Contractor, The Linux Foundation) <e@80x24.org>
Date: Thu, 22 Feb 2018 19:10:31 +0000 (+0000)
Subject: v2writable: warn on duplicate Message-IDs
X-Git-Tag: v1.1.0-pre1~225
X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=61ecf6a904b868ce791115231b11859d725c6113;p=public-inbox.git

v2writable: warn on duplicate Message-IDs

This should give us an idea of how much a problem deduplication
will be.
---

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index cc7e7ec9..f9207e94 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -515,13 +515,15 @@ sub unindex_blob {
 }
 
 sub index_mm {
-	my ($self, $mime) = @_;
+	my ($self, $mime, $warn_existing) = @_;
 	my $mid = mid_clean(mid_mime($mime));
 	my $mm = $self->{mm};
 	my $num = $mm->mid_insert($mid);
+	return $num if defined $num;
 
+	warn "<$mid> reused\n" if $warn_existing;
 	# fallback to num_for since filters like RubyLang set the number
-	defined $num ? $num : $mm->num_for($mid);
+	$mm->num_for($mid);
 }
 
 sub unindex_mm {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index cf19c761..29ed23ca 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -63,7 +63,7 @@ sub add {
 	my ($len, $msgref) = @{$im->{last_object}};
 
 	$self->idx_init;
-	my $num = $self->{all}->index_mm($mime);
+	my $num = $self->{all}->index_mm($mime, 1);
 	my $nparts = $self->{partitions};
 	my $part = $num % $nparts;
 	my $idx = $self->idx_part($part);