use Digest::SHA qw/sha1_hex/;
my $SALT = rand;
-my $LINK_RE = qr!\b((?:ftp|https?|nntp)://
+my $LINK_RE = qr!\b((?:ftps?|https?|nntps?|gopher)://
[\@:\w\.-]+/
- ?[\@\w\+\&\?\.\%\;/#=-]*)!x;
+ ?[,:~\$\@\w\+\&\?\.\%\;/#=-]*)!x;
sub new { bless {}, shift }
my ($self, $s) = @_;
$s =~ s!$LINK_RE!
my $url = $1;
+ my $end = '';
+
+ # it's fairly common to end URLs in messages with
+ # '.', ',' or ';' to denote the end of a statement;
+ # assume the intent was to end the statement/sentence
+ # in English
+ if ($url =~ s/([\.,;])\z//) {
+ $end = $1;
+ }
+
# salt this, as this could be exploited to show
# links in the HTML which don't show up in the raw mail.
my $key = sha1_hex($url . $SALT);
# only escape ampersands, others do not match LINK_RE
$url =~ s/&/&/g;
$self->{$key} = $url;
- 'PI-LINK-'. $key;
+ 'PI-LINK-'. $key . $end;
!ge;
$s;
}