From f6285ab9d73a4eae490dda325096e61eadc415cd Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Thu, 22 Mar 2018 18:21:54 +0000 Subject: [PATCH] import: consolidate mid prepend logic, here This also quiets down warnings from -watch when spam training happens on messages without Message-Id. --- lib/PublicInbox/Import.pm | 31 ++++++++++++++++++++++++------- lib/PublicInbox/V2Writable.pm | 3 +-- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 5d116a1c..6824faca 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -203,7 +203,7 @@ sub remove { my ($r, $w) = $self->gfi_start; my $tip = $self->{tip}; if ($path_type eq '2/38') { - $path = mid2path(mid_mime($mime)); + $path = mid2path(v1_mid0($mime)); ($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime); return ($err, $cur) if $err; } else { @@ -296,6 +296,28 @@ sub drop_unwanted_headers ($) { $mime->header_set($_) for @PublicInbox::MDA::BAD_HEADERS; } +# used by V2Writable, too +sub prepend_mid ($$) { + my ($hdr, $mid0) = @_; + # @cur is likely empty if we need to call this sub, but it could + # have random unparseable crap which we'll preserve, too. + my @cur = $hdr->header_raw('Message-Id'); + $hdr->header_set('Message-Id', "<$mid0>", @cur); +} + +sub v1_mid0 ($) { + my ($mime) = @_; + my $hdr = $mime->header_obj; + my $mids = mids($hdr); + + if (!scalar(@$mids)) { # spam often has no Message-Id + my $mid0 = digest2mid(content_digest($mime)); + prepend_mid($hdr, $mid0); + return $mid0; + } + $mids->[0]; +} + # returns undef on duplicate # returns the :MARK of the most recent commit sub add { @@ -313,12 +335,7 @@ sub add { my $path; if ($path_type eq '2/38') { - my $mids = mids($mime->header_obj); - if (!scalar(@$mids)) { - my $dig = content_digest($mime); - @$mids = (digest2mid($dig)); - } - $path = mid2path($mids->[0]); + $path = mid2path(v1_mid0($mime)); } else { # v2 layout, one file: $path = 'm'; } diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 20c27363..b04e6fbf 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -173,8 +173,7 @@ sub num_for_harder { $num = $self->{skel}->{mm}->mid_insert($$mid0); } } - my @cur = $hdr->header_raw('Message-Id'); - $hdr->header_set('Message-Id', "<$$mid0>", @cur); + PublicInbox::Import::prepend_mid($hdr, $$mid0); $num; } -- 2.44.0