X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLinkify.pm;h=af9be3ff1632be1d8f7ede077f982c55501a782e;hb=a8ae7e31ac36bcda04bf13a1b834207a89b0014c;hp=71193bc28732f28ca715323b04d31e53467ab189;hpb=348f6b2b271d3562509420070bd94d6ac00619dc;p=public-inbox.git diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index 71193bc2..af9be3ff 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2018 all contributors +# Copyright (C) 2014-2019 all contributors # License: AGPL-3.0+ # two-step linkification. @@ -13,6 +13,7 @@ package PublicInbox::Linkify; use strict; use warnings; use Digest::SHA qw/sha1_hex/; +use PublicInbox::Hval qw(ascii_html); my $SALT = rand; my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher):// @@ -47,18 +48,26 @@ sub linkify_1 { if ($url =~ s/$re//) { $end = $1; } - } elsif ($url =~ s/([\.,;])\z//) { - $end = $1; + } elsif ($url =~ s/(\))?([\.,;])\z//) { + $end = $2; + # require ')' to be paired with '(' + if (defined $1) { # ')' + if (index($url, '(') < 0) { + $end = ")$end"; + } else { + $url .= ')'; + } + } } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { $end = ')'; } + $url = ascii_html($url); # for IDN + # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($url . $SALT); - # only escape ampersands, others do not match LINK_RE - $url =~ s/&/&/g; $_[0]->{$key} = $url; $beg . 'PI-LINK-'. $key . $end; ^ge; @@ -80,4 +89,35 @@ sub linkify_2 { $_[1]; } +# single pass linkification of within $str +# with $pfx being the URL prefix +sub linkify_mids { + my ($self, $pfx, $str, $raw) = @_; + $$str =~ s!<([^>]+)>! + my $msgid = PublicInbox::Hval->new_msgid($1); + my $html = $msgid->as_html; + my $href = $msgid->{href}; + $href = ascii_html($href); # for IDN + + # salt this, as this could be exploited to show + # links in the HTML which don't show up in the raw mail. + my $key = sha1_hex($html . $SALT); + my $repl = qq(<$html>); + $repl .= qq{ (raw)} if $raw; + $self->{$key} = $repl; + 'PI-LINK-'. $key; + !ge; + $$str = ascii_html($$str); + $$str =~ s!\bPI-LINK-([a-f0-9]{40})\b! + my $key = $1; + my $repl = $_[0]->{$key}; + if (defined $repl) { + $repl; + } else { + # false positive or somebody tried to mess with us + $key; + } + !ge; +} + 1;