X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLinkify.pm;h=9fc3128fdc2d6842d5f021da71cb7c6423277e31;hb=3f3c294e9466a4db7e0ed53e57b37226d0715f8a;hp=5b83742c14b36ace1e62790b20d417e0971dbc87;hpb=2394cb0bdc671605729b5a4c578ef4cd3b9813fd;p=public-inbox.git diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index 5b83742c..9fc3128f 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2019 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # two-step linkification. @@ -11,12 +11,13 @@ # Maybe this could be done more efficiently... package PublicInbox::Linkify; use strict; -use warnings; +use v5.10.1; use Digest::SHA qw/sha1_hex/; -use PublicInbox::Hval qw(ascii_html); +use PublicInbox::Hval qw(ascii_html mid_href); +use PublicInbox::MID qw($MID_EXTRACT); my $SALT = rand; -my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher):// +my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher):// [\@:\w\.-]+(?:/ (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? @@ -67,23 +68,22 @@ sub linkify_1 { # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($url . $SALT); - + $key =~ tr/0-9/A-J/; # no digits for YAML highlight $_[0]->{$key} = $url; - $beg . 'PI-LINK-'. $key . $end; - ^ge; + $beg . 'LINKIFY' . $key . $end; + ^geo; $_[1]; } sub linkify_2 { - # Added "PI-LINK-" prefix to avoid false-positives on git commits - $_[1] =~ s!\bPI-LINK-([a-f0-9]{40})\b! + # Added "LINKIFY" prefix to avoid false-positives on git commits + $_[1] =~ s!\bLINKIFY([a-fA-J]{40})\b! my $key = $1; my $url = $_[0]->{$key}; if (defined $url) { "$url"; - } else { - # false positive or somebody tried to mess with us - $key; + } else { # false positive or somebody tried to mess with us + 'LINKIFY'.$key; } !ge; $_[1]; @@ -92,30 +92,33 @@ sub linkify_2 { # single pass linkification of within $str # with $pfx being the URL prefix sub linkify_mids { - my ($self, $pfx, $str) = @_; - $$str =~ s!<([^>]+)>! - my $msgid = PublicInbox::Hval->new_msgid($1); - my $html = $msgid->as_html; - my $href = $msgid->{href}; - $href = ascii_html($href); # for IDN + my ($self, $pfx, $str, $raw) = @_; + $$str =~ s!$MID_EXTRACT! + my $mid = $1; + my $html = ascii_html($mid); + my $href = mid_href($mid); # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($html . $SALT); - $self->{$key} = [ $href, $html ]; - ''; + $key =~ tr/0-9/A-J/; + my $repl = qq(<$html>); + $repl .= qq{ (raw)} if $raw; + $self->{$key} = $repl; + 'LINKIFY'.$key; !ge; $$str = ascii_html($$str); - $$str =~ s!\bPI-LINK-([a-f0-9]{40})\b! + $$str =~ s!\bLINKIFY([a-fA-J]{40})\b! my $key = $1; my $repl = $_[0]->{$key}; if (defined $repl) { - "[0]/\">$repl->[1]"; - } else { - # false positive or somebody tried to mess with us - $key; + $repl; + } else { # false positive or somebody tried to mess with us + 'LINKIFY'.$key; } !ge; } +sub to_html { linkify_2($_[0], ascii_html(linkify_1(@_))) } + 1;