]> Sergey Matveev's repositories - public-inbox.git/commitdiff
v2writable: warn on duplicate Message-IDs
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>
Thu, 22 Feb 2018 19:10:31 +0000 (19:10 +0000)
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>
Thu, 22 Feb 2018 21:18:15 +0000 (21:18 +0000)
This should give us an idea of how much a problem deduplication
will be.

lib/PublicInbox/SearchIdx.pm
lib/PublicInbox/V2Writable.pm

index cc7e7ec922ce812da43c0a6b5a323f7700e663cb..f9207e9464cd1dd3afe89b3b1052635db1682ea5 100644 (file)
@@ -515,13 +515,15 @@ sub unindex_blob {
 }
 
 sub index_mm {
-       my ($self, $mime) = @_;
+       my ($self, $mime, $warn_existing) = @_;
        my $mid = mid_clean(mid_mime($mime));
        my $mm = $self->{mm};
        my $num = $mm->mid_insert($mid);
+       return $num if defined $num;
 
+       warn "<$mid> reused\n" if $warn_existing;
        # fallback to num_for since filters like RubyLang set the number
-       defined $num ? $num : $mm->num_for($mid);
+       $mm->num_for($mid);
 }
 
 sub unindex_mm {
index cf19c7610cb775f1cb09d5c985442046996ce52a..29ed23caaa76fb87fcce3e48caeb4ec7c7f26a23 100644 (file)
@@ -63,7 +63,7 @@ sub add {
        my ($len, $msgref) = @{$im->{last_object}};
 
        $self->idx_init;
-       my $num = $self->{all}->index_mm($mime);
+       my $num = $self->{all}->index_mm($mime, 1);
        my $nparts = $self->{partitions};
        my $part = $num % $nparts;
        my $idx = $self->idx_part($part);