]> Sergey Matveev's repositories - public-inbox.git/commitdiff
address: extract more characters from email addresses
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>
Thu, 15 Feb 2018 00:25:53 +0000 (00:25 +0000)
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>
Thu, 15 Feb 2018 01:08:15 +0000 (01:08 +0000)
There's a lot of weird characters which show up in LKML archives
which we did not support before.  Furthermore, allow spaces
before the '>' in the From: line as at least some non-spam
poster used it.

lib/PublicInbox/Address.pm
t/address.t

index f334adeac02d0b797d5d29c715a0ec49da049c48..548f417cae91e25ff5ddc6d1ee031d290129ef73 100644 (file)
@@ -8,7 +8,8 @@ use warnings;
 # just enough to make thing sanely displayable and pass to git
 
 sub emails {
-       ($_[0] =~ /([\w\.\+=\-]+\@[\w\.\-]+)>?\s*(?:\(.*?\))?(?:,\s*|\z)/g)
+       ($_[0] =~ /([\w\.\+=\?"\(\)\-!#\$%&'\*\/\^\`\|\{\}~]+\@[\w\.\-\(\)]+)
+               (?:\s[^>]*)?>?\s*(?:\(.*?\))?(?:,\s*|\z)/gx)
 }
 
 sub names {
index e35e4f8b0a1eb2d45032fb4ed37cef48802aac4b..eced5c4632cdea1e86990ac7e594e61acf75c6ab 100644 (file)
@@ -9,8 +9,9 @@ is_deeply([qw(e@example.com e@example.org)],
        [PublicInbox::Address::emails('User <e@example.com>, e@example.org')],
        'address extraction works as expected');
 
-is_deeply([PublicInbox::Address::emails('"ex@example.com" <ex@example.com>')],
-       [qw(ex@example.com)]);
+is_deeply(['user@example.com'],
+       [PublicInbox::Address::emails('<user@example.com (Comment)>')],
+       'comment after domain accepted before >');
 
 my @names = PublicInbox::Address::names(
        'User <e@e>, e@e, "John A. Doe" <j@d>, <x@x>');