From 3f3c294e9466a4db7e0ed53e57b37226d0715f8a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 28 Aug 2022 03:59:50 +0000 Subject: [PATCH] linkify: avoid digits and dashes in placeholders The `highlight' module seems to highlight every digit in YAML (and possibly other) source files. This causes problems in linkify_2 which replaces the placeholders with proper URIs. I suspect `-' and other punctuation characters will cause similar problems, so we must stick to [A-Za-z]. Thus transliterate 0-9 to A-J in the hex key to ensure highlight doesn't see digit characters, and rename the prefix to be project-name independent. --- lib/PublicInbox/Linkify.pm | 27 +++++++++++++-------------- t/linkify.t | 5 +++++ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index 2ac74e2a..9fc3128f 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # two-step linkification. @@ -11,7 +11,7 @@ # Maybe this could be done more efficiently... package PublicInbox::Linkify; use strict; -use warnings; +use v5.10.1; use Digest::SHA qw/sha1_hex/; use PublicInbox::Hval qw(ascii_html mid_href); use PublicInbox::MID qw($MID_EXTRACT); @@ -68,23 +68,22 @@ sub linkify_1 { # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($url . $SALT); - + $key =~ tr/0-9/A-J/; # no digits for YAML highlight $_[0]->{$key} = $url; - $beg . 'PI-LINK-'. $key . $end; + $beg . 'LINKIFY' . $key . $end; ^geo; $_[1]; } sub linkify_2 { - # Added "PI-LINK-" prefix to avoid false-positives on git commits - $_[1] =~ s!\bPI-LINK-([a-f0-9]{40})\b! + # Added "LINKIFY" prefix to avoid false-positives on git commits + $_[1] =~ s!\bLINKIFY([a-fA-J]{40})\b! my $key = $1; my $url = $_[0]->{$key}; if (defined $url) { "$url"; - } else { - # false positive or somebody tried to mess with us - $key; + } else { # false positive or somebody tried to mess with us + 'LINKIFY'.$key; } !ge; $_[1]; @@ -102,20 +101,20 @@ sub linkify_mids { # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($html . $SALT); + $key =~ tr/0-9/A-J/; my $repl = qq(<$html>); $repl .= qq{ (raw)} if $raw; $self->{$key} = $repl; - 'PI-LINK-'. $key; + 'LINKIFY'.$key; !ge; $$str = ascii_html($$str); - $$str =~ s!\bPI-LINK-([a-f0-9]{40})\b! + $$str =~ s!\bLINKIFY([a-fA-J]{40})\b! my $key = $1; my $repl = $_[0]->{$key}; if (defined $repl) { $repl; - } else { - # false positive or somebody tried to mess with us - $key; + } else { # false positive or somebody tried to mess with us + 'LINKIFY'.$key; } !ge; } diff --git a/t/linkify.t b/t/linkify.t index e42e1efe..9280fd91 100644 --- a/t/linkify.t +++ b/t/linkify.t @@ -144,4 +144,9 @@ href="http://www.$hc.example.com/">http://www.$hc.example.com/}; is($s, $expect, 'IDN message escaped properly'); } +{ + my $false_positive = 'LINKIFY'.('A' x 40); + is(PublicInbox::Linkify->new->to_html($false_positive), + $false_positive, 'false-positive left as-is'); +} done_testing(); -- 2.44.0