From: Eric Wong Date: Sat, 30 Jan 2021 05:41:09 +0000 (-0600) Subject: content_hash: skip Sender for cross posted messages X-Git-Tag: v1.7.0~1272 X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=39a85f0d21e7effa94109ca2dac292e110345a15;p=public-inbox.git content_hash: skip Sender for cross posted messages This regression was introduced long ago and matches behavior originally specified in the comments. It makes a noticeable improvement with search results using -extindex ("all") and lei results with multiple inboxes. Update some style bits at the top of the test case while we're at it. Fixes: f0ef0a56a8957d6f ("v2: improve deduplication checks") --- diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm index 838fdd6f..4dbe7b50 100644 --- a/lib/PublicInbox/ContentHash.pm +++ b/lib/PublicInbox/ContentHash.pm @@ -68,10 +68,9 @@ sub content_digest ($) { # Only use Sender: if From is not present foreach my $h (qw(From Sender)) { - my @v = $eml->header($h); - if (@v) { - digest_addr($dig, $h, $_) foreach @v; - } + my @v = $eml->header($h) or next; + digest_addr($dig, $h, $_) foreach @v; + last; } foreach my $h (qw(Subject Date)) { my @v = $eml->header($h); diff --git a/t/content_hash.t b/t/content_hash.t index 3f02b1b3..060665f6 100644 --- a/t/content_hash.t +++ b/t/content_hash.t @@ -1,7 +1,8 @@ +#!perl -w # Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; -use warnings; +use v5.10.1; use Test::More; use PublicInbox::ContentHash qw(content_hash); use PublicInbox::Eml; @@ -19,6 +20,17 @@ EOF my $orig = content_hash($mime); my $reload = content_hash(PublicInbox::Eml->new($mime->as_string)); is($orig, $reload, 'content_hash matches after serialization'); +{ + my $s1 = PublicInbox::Eml->new($mime->as_string); + $s1->header_set('Sender', 's@example.com'); + is(content_hash($s1), $orig, "Sender ignored when 'From' present"); + my $s2 = PublicInbox::Eml->new($s1->as_string); + $s1->header_set('Sender', 'sender@example.com'); + is(content_hash($s2), $orig, "Sender really ignored 'From'"); + $_->header_set('From') for ($s1, $s2); + isnt(content_hash($s1), content_hash($s2), + 'sender accounted when From missing'); +} foreach my $h (qw(From To Cc)) { my $n = q("Quoted N'Ame" );