From dfdaf74a2ab6d694315d8f636e3771a7a7934f3f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 23 Jun 2017 22:42:34 +0000 Subject: [PATCH] linkify: handle URLs in parenthesized statements Sometimes, URLs exist at the end of parethesized statements, and we shouldn't unnecessarily capture that. (example: https://public-inbox.org/ruby-core/20170623032722.GA8124@dcvr/) --- lib/PublicInbox/Linkify.pm | 6 ++++-- t/linkify.t | 10 ++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index 8e1728c7..93c468fe 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -25,7 +25,7 @@ my $LINK_RE = qr{(\()?\b((?:ftps?|https?|nntps?|gopher):// sub new { bless {}, $_[0] } sub linkify_1 { - $_[1] =~ s!$LINK_RE! + $_[1] =~ s^$LINK_RE^ my $beg = $1 || ''; my $url = $2; my $end = ''; @@ -41,6 +41,8 @@ sub linkify_1 { } } elsif ($url =~ s/([\.,;])\z//) { $end = $1; + } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { + $end = ')'; } # salt this, as this could be exploited to show @@ -51,7 +53,7 @@ sub linkify_1 { $url =~ s/&/&/g; $_[0]->{$key} = $url; $beg . 'PI-LINK-'. $key . $end; - !ge; + ^ge; $_[1]; } diff --git a/t/linkify.t b/t/linkify.t index 99acf17d..041c15c7 100644 --- a/t/linkify.t +++ b/t/linkify.t @@ -14,6 +14,16 @@ use PublicInbox::Linkify; is($s, qq($u.), 'trailing period not in URL'); } +# handle URLs in parenthesized statements +{ + my $l = PublicInbox::Linkify->new; + my $u = 'http://example.com/'; + my $s = "(see: $u)"; + $s = $l->linkify_1($s); + $s = $l->linkify_2($s); + is($s, qq{(see: $u)}, 'trailing ) not in URL'); +} + { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/url-with-trailing-semicolon'; -- 2.44.0