From b8eb1f39dc9aea6ce84373c50c47c6fc4ac8c503 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 29 Jan 2019 07:44:28 +0000 Subject: [PATCH] mid: filter out 'y', 'n', and email addresses from references() Looking at git@vger history, several emails had broken References/In-Reply-To pointing to , and email addresses as Message-IDs in References and In-Reply-To headers. This was causing too many unrelated messages to be linked together in the same thread. --- lib/PublicInbox/MID.pm | 25 +++++++++++++++++++------ t/mid.t | 4 ++++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index cd56f272..7f1ab15e 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -10,6 +10,7 @@ our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC mids references/; use URI::Escape qw(uri_escape_utf8); use Digest::SHA qw/sha1_hex/; +require PublicInbox::Address; use constant { MID_MAX => 40, # SHA-1 hex length # TODO: get rid of this MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q') @@ -79,22 +80,34 @@ sub references ($) { push(@mids, ($v =~ /<([^>]+)>/sg)); } } - uniq_mids(\@mids); + + # old versions of git-send-email would prompt users for + # In-Reply-To and users' muscle memory would use 'y' or 'n' + # as responses: + my %addr = ( y => 1, n => 1 ); + + foreach my $f (qw(To From Cc)) { + my @v = $hdr->header_raw($f); + foreach my $v (@v) { + $addr{$_} = 1 for (PublicInbox::Address::emails($v)); + } + } + uniq_mids(\@mids, \%addr); } -sub uniq_mids ($) { - my ($mids) = @_; +sub uniq_mids ($;$) { + my ($mids, $seen) = @_; my @ret; - my %seen; + $seen ||= {}; foreach my $mid (@$mids) { $mid =~ tr/\n\t\r//d; if (length($mid) > MAX_MID_SIZE) { warn "Message-ID: <$mid> too long, truncating\n"; $mid = substr($mid, 0, MAX_MID_SIZE); } - next if $seen{$mid}; + next if $seen->{$mid}; push @ret, $mid; - $seen{$mid} = 1; + $seen->{$mid} = 1; } \@ret; } diff --git a/t/mid.t b/t/mid.t index 8c307c82..69a8a708 100644 --- a/t/mid.t +++ b/t/mid.t @@ -36,6 +36,10 @@ is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)'); $mime->header_set('Message-ID', ""); is_deeply(mids($mime->header_obj), ['helloworld'], 'drop \t in Message-ID'); + + $mime->header_set('To', 'u@example.com'); + $mime->header_set('References', ' '); + is_deeply(references($mime->header_obj), [qw(hello world)]); } done_testing(); -- 2.44.0