lib/PublicInbox/Linkify.pm | 5 +++-- t/linkify.t | 12 ++++++++++++ diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index d4778e7de371ecc5c3059363bb0d0ec1cc334686..84960a98889fd4c53cc68aa5fce72eac1094084f 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -13,6 +13,7 @@ package PublicInbox::Linkify; use strict; use warnings; use Digest::SHA qw/sha1_hex/; +use PublicInbox::Hval qw(ascii_html); my $SALT = rand; my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher):// @@ -61,12 +62,12 @@ } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { $end = ')'; } + $url = ascii_html($url); # for IDN + # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($url . $SALT); - # only escape ampersands, others do not match LINK_RE - $url =~ s/&/&/g; $_[0]->{$key} = $url; $beg . 'PI-LINK-'. $key . $end; ^ge; diff --git a/t/linkify.t b/t/linkify.t index fe218b91f95c4a89e0d3eaea0e6a2d6052b39c53..c492358257525fab99487f8e53bde3e55e209896 100644 --- a/t/linkify.t +++ b/t/linkify.t @@ -132,4 +132,16 @@ like($s, qr!\(see ]+>http://example\.com/\)\.!s, 'punctuation with unpaired ) OK') } +if ('IDN example: ') { + my $hc = '月'; + my $u = "http://www.\x{6708}.example.com/"; + my $s = $u; + my $l = PublicInbox::Linkify->new; + $s = $l->linkify_1($s); + $s = $l->linkify_2($s); + my $expect = qq{http://www.$hc.example.com/}; + is($s, $expect, 'IDN message escaped properly'); +} + done_testing();