lib/PublicInbox/Linkify.pm | 5 +++--
t/linkify.t | 12 ++++++++++++
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index d4778e7de371ecc5c3059363bb0d0ec1cc334686..84960a98889fd4c53cc68aa5fce72eac1094084f 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -13,6 +13,7 @@ package PublicInbox::Linkify;
use strict;
use warnings;
use Digest::SHA qw/sha1_hex/;
+use PublicInbox::Hval qw(ascii_html);
my $SALT = rand;
my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher)://
@@ -61,12 +62,12 @@ } elsif ($url !~ /\(/ && $url =~ s/\)\z//) {
$end = ')';
}
+ $url = ascii_html($url); # for IDN
+
# salt this, as this could be exploited to show
# links in the HTML which don't show up in the raw mail.
my $key = sha1_hex($url . $SALT);
- # only escape ampersands, others do not match LINK_RE
- $url =~ s/&/&/g;
$_[0]->{$key} = $url;
$beg . 'PI-LINK-'. $key . $end;
^ge;
diff --git a/t/linkify.t b/t/linkify.t
index fe218b91f95c4a89e0d3eaea0e6a2d6052b39c53..c492358257525fab99487f8e53bde3e55e209896 100644
--- a/t/linkify.t
+++ b/t/linkify.t
@@ -132,4 +132,16 @@ like($s, qr!\(see ]+>http://example\.com/\)\.!s,
'punctuation with unpaired ) OK')
}
+if ('IDN example: ') {
+ my $hc = '月';
+ my $u = "http://www.\x{6708}.example.com/";
+ my $s = $u;
+ my $l = PublicInbox::Linkify->new;
+ $s = $l->linkify_1($s);
+ $s = $l->linkify_2($s);
+ my $expect = qq{http://www.$hc.example.com/};
+ is($s, $expect, 'IDN message escaped properly');
+}
+
done_testing();