From: Eric Wong Date: Sat, 3 Apr 2021 10:48:26 +0000 (+0000) Subject: lei: improve handling of Message-ID-less draft messages X-Git-Tag: v1.7.0~827 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=b3e2975029ae938bb232aaa0cbc3dabda55d57d6 lei: improve handling of Message-ID-less draft messages We need a stable fallback time for digest2mid in the presence of messages without Received/Date headers. Furthermore, we must avoid using uninitialized smsg->{mid} when parsing References for draft replies. --- diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 34738279..46f57e27 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -510,8 +510,8 @@ sub atfork_child { } } -sub digest2mid ($$) { - my ($dig, $hdr) = @_; +sub digest2mid ($$;$) { + my ($dig, $hdr, $fallback_time) = @_; my $b64 = $dig->clone->b64digest; # Make our own URLs nicer: # See "Base 64 Encoding with URL and Filename Safe Alphabet" in RFC4648 @@ -520,7 +520,7 @@ sub digest2mid ($$) { # Add a date prefix to prevent a leading '-' in case that trips # up some tools (e.g. if a Message-ID were a expected as a # command-line arg) - my $dt = msg_datestamp($hdr); + my $dt = msg_datestamp($hdr, $fallback_time); $dt = POSIX::strftime('%Y%m%d%H%M%S', gmtime($dt)); "$dt.$b64" . '@z'; } diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index 69ba8303..148aa185 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -58,7 +58,7 @@ sub content_key ($) { qw(Message-ID X-Alt-Message-ID Resent-Message-ID)); unless (@$mids) { $eml->{-lei_fake_mid} = $mids->[0] = - PublicInbox::Import::digest2mid($dig, $eml); + PublicInbox::Import::digest2mid($dig, $eml, 0); } ($chash, $mids); } diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index e1cd31b9..66dec099 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -264,8 +264,10 @@ sub add_overview { $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!; my $mids = mids_for_index($eml); my $refs = $smsg->parse_references($eml, $mids); - $mids->[0] //= $smsg->{mid} //= $eml->{-lei_fake_mid}; - $smsg->{mid} //= ''; + $mids->[0] //= do { + $smsg->{mid} //= ''; + $eml->{-lei_fake_mid}; + }; my $subj = $smsg->{subject}; my $xpath; if ($subj ne '') { diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm index b4cc2ecb..da8ce590 100644 --- a/lib/PublicInbox/Smsg.pm +++ b/lib/PublicInbox/Smsg.pm @@ -76,7 +76,7 @@ sub parse_references ($$$) { return $refs if scalar(@$refs) == 0; # prevent circular references here: - my %seen = ( $smsg->{mid} => 1 ); + my %seen = ( ($smsg->{mid} // '') => 1 ); my @keep; foreach my $ref (@$refs) { if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) { diff --git a/t/lei-import.t b/t/lei-import.t index 99289748..9bb4e1fa 100644 --- a/t/lei-import.t +++ b/t/lei-import.t @@ -79,6 +79,27 @@ is($res->[1], undef, 'only one result'); is($res->[0]->{'m'}, 'k@y', 'got expected message'); is_deeply($res->[0]->{kw}, ['seen'], "`seen' keywords set"); +# no From, Sender, or Message-ID +$eml_str = <<'EOM'; +Subject: draft message with no sender +References: + +No use for a name +EOM +lei_ok([qw(import -F eml -)], undef, { %$lei_opt, 0 => \$eml_str }); +lei_ok(['q', 's:draft message with no sender']); +my $draft_a = json_utf8->decode($lei_out); +ok(!exists $draft_a->[0]->{'m'}, 'no fake mid stored or exposed'); +lei_ok([qw(tag -F eml - +kw:draft)], undef, { %$lei_opt, 0 => \$eml_str }); +lei_ok(['q', 's:draft message with no sender']); +my $draft_b = json_utf8->decode($lei_out); +my $kw = delete $draft_b->[0]->{kw}; +is_deeply($kw, ['draft'], 'draft kw set'); +is_deeply($draft_a, $draft_b, 'fake Message-ID lookup') or + diag explain($draft_a, $draft_b); +lei_ok('blob', '--mail', $draft_b->[0]->{blob}); +is($lei_out, $eml_str, 'draft retrieved by blob'); + # see t/lei_to_mail.t for "import -F mbox*" }); done_testing;