From e55bef366b87ecbcb66c93669f41876afc1d2446 Mon Sep 17 00:00:00 2001
From: Eric Wong <e@80x24.org>
Date: Thu, 18 Apr 2019 23:49:42 +0000
Subject: [PATCH] linkify: require parentheses pairs in URLs

Dangling parentheses with trailing punctuation usually means the
parentheses is not intended as part of the URL.
---
 lib/PublicInbox/Linkify.pm | 12 ++++++++++--
 t/linkify.t                | 15 +++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index 71193bc2..d4778e7d 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -47,8 +47,16 @@ sub linkify_1 {
 			if ($url =~ s/$re//) {
 				$end = $1;
 			}
-		} elsif ($url =~ s/([\.,;])\z//) {
-			$end = $1;
+		} elsif ($url =~ s/(\))?([\.,;])\z//) {
+			$end = $2;
+			# require ')' to be paired with '('
+			if (defined $1) { # ')'
+				if (index($url, '(') < 0) {
+					$end = ")$end";
+				} else {
+					$url .= ')';
+				}
+			}
 		} elsif ($url !~ /\(/ && $url =~ s/\)\z//) {
 			$end = ')';
 		}
diff --git a/t/linkify.t b/t/linkify.t
index bef4ffd6..fe218b91 100644
--- a/t/linkify.t
+++ b/t/linkify.t
@@ -50,6 +50,11 @@ use PublicInbox::Linkify;
 	$s = $l->linkify_2($s);
 	is($s, qq(hello <a\nhref="$u">$u</a> world), 'URL preserved');
 
+	$s = "$u. hi";
+	$s = $l->linkify_1($s);
+	$s = $l->linkify_2($s);
+	is($s, qq(<a\nhref="$u">$u</a>. hi), 'paired () in URL OK');
+
 	$u .= "?query=a";
 	$s = "hello $u world";
 	$s = $l->linkify_1($s);
@@ -117,4 +122,14 @@ use PublicInbox::Linkify;
 	}
 }
 
+# dangling ')'  cf. see MaintNotes in git.git todo branch
+{
+	my $l = PublicInbox::Linkify->new;
+	my $s = '(see http://example.com/).';
+	$s = $l->linkify_1($s);
+	$s = $l->linkify_2($s);
+	like($s, qr!\(see <a[^>]+>http://example\.com/</a>\)\.!s,
+		'punctuation with unpaired ) OK')
+}
+
 done_testing();
-- 
2.50.0