The "\w" character class in Perl matches any word characters
in the Unicode database, not just ASCII characters. So we
must be prepared for that and generate links to IDNs.
use strict;
use warnings;
use Digest::SHA qw/sha1_hex/;
+use PublicInbox::Hval qw(ascii_html);
my $SALT = rand;
my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher)://
$end = ')';
}
+ $url = ascii_html($url); # for IDN
+
# salt this, as this could be exploited to show
# links in the HTML which don't show up in the raw mail.
my $key = sha1_hex($url . $SALT);
- # only escape ampersands, others do not match LINK_RE
- $url =~ s/&/&/g;
$_[0]->{$key} = $url;
$beg . 'PI-LINK-'. $key . $end;
^ge;
'punctuation with unpaired ) OK')
}
+if ('IDN example: <ACDB98F4-178C-43C3-99C4-A1D03DD6A8F5@sb.org>') {
+ my $hc = '月';
+ my $u = "http://www.\x{6708}.example.com/";
+ my $s = $u;
+ my $l = PublicInbox::Linkify->new;
+ $s = $l->linkify_1($s);
+ $s = $l->linkify_2($s);
+ my $expect = qq{<a
+href="http://www.$hc.example.com/">http://www.$hc.example.com/</a>};
+ is($s, $expect, 'IDN message escaped properly');
+}
+
done_testing();