From: Eric Wong (Contractor, The Linux Foundation) <e@80x24.org>
Date: Mon, 19 Mar 2018 08:14:46 +0000 (+0000)
Subject: import: force Message-ID generation for v1 here
X-Git-Tag: v1.1.0-pre1~156
X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=12c3eb5b2b0860292a32d5743ea0157996e9a4b9;p=public-inbox.git

import: force Message-ID generation for v1 here

This allows us to share code for generating Message-IDs
between v1 and v2 repos.

For v1, this introduces a slight incompatibility in message
removal iff the original message lacked a Message-ID AND
the training request came from a message which did not
pass through the public-inbox:

The workaround for this would be to reuse the bad message from
the archive itself.
---

diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 12df7d59..4c007b61 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -9,9 +9,10 @@ use strict;
 use warnings;
 use Fcntl qw(:flock :DEFAULT);
 use PublicInbox::Spawn qw(spawn);
-use PublicInbox::MID qw(mid_mime mid2path);
+use PublicInbox::MID qw(mids mid_mime mid2path);
 use PublicInbox::Address;
 use PublicInbox::MsgTime qw(msg_timestamp);
+use PublicInbox::ContentId qw(content_digest);
 
 sub new {
 	my ($class, $git, $name, $email, $ibx) = @_;
@@ -308,7 +309,12 @@ sub add {
 
 	my $path;
 	if ($path_type eq '2/38') {
-		$path = mid2path(mid_mime($mime));
+		my $mids = mids($mime->header_obj);
+		if (!scalar(@$mids)) {
+			my $dig = content_digest($mime);
+			@$mids = (digest2mid($dig));
+		}
+		$path = mid2path($mids->[0]);
 	} else { # v2 layout, one file:
 		$path = 'm';
 	}
@@ -393,6 +399,11 @@ sub atfork_child {
 	}
 }
 
+sub digest2mid ($) {
+	my ($dig) = @_;
+	$dig->clone->hexdigest . '@localhost';
+}
+
 1;
 __END__
 =pod
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index fbc71c89..a305842e 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -146,19 +146,19 @@ sub num_for_harder {
 
 	my $hdr = $mime->header_obj;
 	my $dig = content_digest($mime);
-	$$mid0 = $dig->clone->hexdigest . '@localhost';
+	$$mid0 = PublicInbox::Import::digest2mid($dig);
 	my $num = $self->{skel}->{mm}->mid_insert($$mid0);
 	unless (defined $num) {
 		# it's hard to spoof the last Received: header
 		my @recvd = $hdr->header_raw('Received');
 		$dig->add("Received: $_") foreach (@recvd);
-		$$mid0 = $dig->clone->hexdigest . '@localhost';
+		$$mid0 = PublicInbox::Import::digest2mid($dig);
 		$num = $self->{skel}->{mm}->mid_insert($$mid0);
 
 		# fall back to a random Message-ID and give up determinism:
 		until (defined($num)) {
 			$dig->add(rand);
-			$$mid0 = $dig->clone->hexdigest . '@localhost';
+			$$mid0 = PublicInbox::Import::digest2mid($dig);
 			warn "using random Message-ID <$$mid0> as fallback\n";
 			$num = $self->{skel}->{mm}->mid_insert($$mid0);
 		}
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index f2d3db95..3adebdde 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -12,8 +12,6 @@ use PublicInbox::Import;
 use PublicInbox::MDA;
 use PublicInbox::Spawn qw(spawn);
 use File::Temp qw//;
-use PublicInbox::MID qw(mids);
-use PublicInbox::ContentId qw(content_digest);
 
 sub new {
 	my ($class, $config) = @_;
@@ -127,7 +125,6 @@ sub _remove_spam {
 	# path must be marked as (S)een
 	$path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return;
 	my $mime = _path_to_mime($path) or return;
-	_force_mid($mime);
 	$self->{config}->each_inbox(sub {
 		my ($ibx) = @_;
 		eval {
@@ -146,16 +143,6 @@ sub _remove_spam {
 	})
 }
 
-sub _force_mid {
-	my ($mime) = @_;
-	my $hdr = $mime->header_obj;
-	my $mids = mids($hdr);
-	return if @$mids;
-	my $dig = content_digest($mime);
-	my $mid = $dig->clone->hexdigest . '@localhost';
-	$hdr->header_set('Message-Id', $mid);
-}
-
 sub _try_path {
 	my ($self, $path) = @_;
 	my @p = split(m!/+!, $path);
@@ -191,7 +178,6 @@ sub _try_path {
 		$mime = $ret;
 	}
 
-	_force_mid($mime);
 	$im->add($mime, $self->{spamcheck});
 }