]> Sergey Matveev's repositories - public-inbox.git/commitdiff
content_hash: normalize whitespace before hashing addresses
authorEric Wong <e@80x24.org>
Sat, 2 Oct 2021 11:18:34 +0000 (11:18 +0000)
committerEric Wong <e@80x24.org>
Sat, 2 Oct 2021 20:09:38 +0000 (20:09 +0000)
This should prevent some false duplicates.  I noticed this
while implementing "lei mail-diff", and only noticed it when
I implemented the ContentDigestDbg wrapper for mail-diff.

lib/PublicInbox/ContentHash.pm

index f6ae9011c1bf2d471e239550fd7347c85adbeb3f..bacc9cdda12498abbb0ada5d2a2e2faec10190f2 100644 (file)
@@ -20,6 +20,7 @@ use Digest::SHA;
 sub digest_addr ($$$) {
        my ($dig, $h, $v) = @_;
        $v =~ tr/"//d;
+       $v =~ tr/\r\n\t / /s;
        $v =~ s/@([a-z0-9\_\.\-\(\)]*([A-Z])\S*)/'@'.lc($1)/ge;
        utf8::encode($v);
        $dig->add("$h\0$v\0");