From: Eric Wong (Contractor, The Linux Foundation) Date: Thu, 15 Feb 2018 00:25:53 +0000 (+0000) Subject: address: extract more characters from email addresses X-Git-Tag: v1.1.0-pre1~241 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=f46019039da6ac1596a4aef64b7bf394c743c1b1 address: extract more characters from email addresses There's a lot of weird characters which show up in LKML archives which we did not support before. Furthermore, allow spaces before the '>' in the From: line as at least some non-spam poster used it. --- diff --git a/lib/PublicInbox/Address.pm b/lib/PublicInbox/Address.pm index f334adea..548f417c 100644 --- a/lib/PublicInbox/Address.pm +++ b/lib/PublicInbox/Address.pm @@ -8,7 +8,8 @@ use warnings; # just enough to make thing sanely displayable and pass to git sub emails { - ($_[0] =~ /([\w\.\+=\-]+\@[\w\.\-]+)>?\s*(?:\(.*?\))?(?:,\s*|\z)/g) + ($_[0] =~ /([\w\.\+=\?"\(\)\-!#\$%&'\*\/\^\`\|\{\}~]+\@[\w\.\-\(\)]+) + (?:\s[^>]*)?>?\s*(?:\(.*?\))?(?:,\s*|\z)/gx) } sub names { diff --git a/t/address.t b/t/address.t index e35e4f8b..eced5c46 100644 --- a/t/address.t +++ b/t/address.t @@ -9,8 +9,9 @@ is_deeply([qw(e@example.com e@example.org)], [PublicInbox::Address::emails('User , e@example.org')], 'address extraction works as expected'); -is_deeply([PublicInbox::Address::emails('"ex@example.com" ')], - [qw(ex@example.com)]); +is_deeply(['user@example.com'], + [PublicInbox::Address::emails('')], + 'comment after domain accepted before >'); my @names = PublicInbox::Address::names( 'User , e@e, "John A. Doe" , ');