From: Eric Wong Date: Sat, 2 Oct 2021 11:18:34 +0000 (+0000) Subject: content_hash: normalize whitespace before hashing addresses X-Git-Tag: v1.7.0~226 X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=f28fdcd6d8d6ac36c7b6adf6670238426f3cc067;p=public-inbox.git content_hash: normalize whitespace before hashing addresses This should prevent some false duplicates. I noticed this while implementing "lei mail-diff", and only noticed it when I implemented the ContentDigestDbg wrapper for mail-diff. --- diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm index f6ae9011..bacc9cdd 100644 --- a/lib/PublicInbox/ContentHash.pm +++ b/lib/PublicInbox/ContentHash.pm @@ -20,6 +20,7 @@ use Digest::SHA; sub digest_addr ($$$) { my ($dig, $h, $v) = @_; $v =~ tr/"//d; + $v =~ tr/\r\n\t / /s; $v =~ s/@([a-z0-9\_\.\-\(\)]*([A-Z])\S*)/'@'.lc($1)/ge; utf8::encode($v); $dig->add("$h\0$v\0");