X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FHlMod.pm;h=f42ece80ededc4a04f06f453170c7320aaf94391;hb=HEAD;hp=5cbfb29862ecffa6a1119ef46b7d599b5b7bb830;hpb=0a04fa7bd38c8f491b429dc7d8578735ca7ca3f4;p=public-inbox.git diff --git a/lib/PublicInbox/HlMod.pm b/lib/PublicInbox/HlMod.pm index 5cbfb298..f42ece80 100644 --- a/lib/PublicInbox/HlMod.pm +++ b/lib/PublicInbox/HlMod.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2019 all contributors +# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # I have no idea how stable or safe this is for handling untrusted @@ -14,11 +14,13 @@ # wrapper for SWIG-generated highlight.pm bindings package PublicInbox::HlMod; use strict; -use warnings; +use v5.10.1; use highlight; # SWIG-generated stuff +use PublicInbox::Hval qw(src_escape ascii_html); +my $hl; sub _parse_filetypes ($) { - my $ft_conf = $_[0]->searchFile('filetypes.conf') or + my $ft_conf = $_[0]->getFiletypesConfPath('filetypes') or die 'filetypes.conf not found by highlight'; open my $fh, '<', $ft_conf or die "failed to open($ft_conf): $!"; local $/; @@ -52,16 +54,19 @@ sub _parse_filetypes ($) { (\%ext2lang, \@shebang); } +# We only need one instance sub new { my ($class) = @_; - my $dir = highlight::DataDir->new; - $dir->initSearchDirectories(''); - my ($ext2lang, $shebang) = _parse_filetypes($dir); - bless { - -dir => $dir, - -ext2lang => $ext2lang, - -shebang => $shebang, - }, $class; + $hl ||= do { + my $dir = highlight::DataDir->new; + $dir->initSearchDirectories(''); + my ($ext2lang, $shebang) = _parse_filetypes($dir); + bless { + -dir => $dir, + -ext2lang => $ext2lang, + -shebang => $shebang, + }, $class; + }; } sub _shebang2lang ($$) { @@ -83,44 +88,54 @@ sub _path2lang ($$) { sub do_hl { my ($self, $str, $path) = @_; my $lang = _path2lang($self, $path) if defined $path; - my $dir = $self->{-dir}; + do_hl_lang($self, $str, $lang); +} + +sub do_hl_lang { + my ($self, $str, $lang) = @_; + my $langpath; if (defined $lang) { - $langpath = $dir->getLangPath("$lang.lang") or return; - $langpath = undef unless -f $langpath; - } - unless (defined $langpath) { - $lang = _shebang2lang($self, $str) or return; - $langpath = $dir->getLangPath("$lang.lang") or return; - $langpath = undef unless -f $langpath; + $langpath = $self->{-dir}->getLangPath("$lang.lang") or return; + undef $lang unless -f $langpath; } - return unless defined $langpath; + $lang //= _shebang2lang($self, $str) // return; + $langpath = $self->{-dir}->getLangPath("$lang.lang") or return; + return unless -f $langpath; - my $gen = $self->{$langpath} ||= do { - my $g = highlight::CodeGenerator::getInstance($highlight::HTML); - $g->setFragmentCode(1); # generate html fragment - $g->setHTMLEnclosePreTag(1); # include
+	my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
+	$g->setFragmentCode(1); # generate html fragment
 
-		# whatever theme works
-		my $themepath = $dir->getThemePath('print.theme');
-		$g->initTheme($themepath);
-		$g->loadLanguage($langpath);
-		$g->setEncoding('utf-8');
-		$g;
-	};
-	\($gen->generateString($$str))
+	# whatever theme works
+	$g->initTheme($self->{-dir}->getThemePath('print.theme'));
+	$g->loadLanguage($langpath);
+	$g->setEncoding('utf-8');
+	# we assume $$str is valid UTF-8, but the SWIG binding doesn't
+	# know that, so ensure it's marked as UTF-8 even if it isnt...
+	my $out = $g->generateString($$str);
+	highlight::CodeGenerator::deleteInstance($g);
+	utf8::decode($out);
+	src_escape($out);
+	\$out;
 }
 
-# SWIG instances aren't reference-counted, but $self is;
-# so we need to delete all the CodeGenerator instances manually
-# at our own destruction
-sub DESTROY {
-	my ($self) = @_;
-	foreach my $gen (values %$self) {
-		if (ref($gen) eq 'highlight::CodeGenerator') {
-			highlight::CodeGenerator::deleteInstance($gen);
+# Highlight text, but support Markdown "```$LANG" notation
+# while preserving WYSIWYG of plain-text documentation.
+# This is NOT to be enabled by default or encouraged for parsing
+# emails, since it is NOT stable and can lead to standards
+# proliferation of email.
+sub do_hl_text {
+	my ($self, $str) = @_;
+
+	$$str = join('', map {
+		if (/\A(``` ?)(\w+)\s*?\n(.+)(^```\s*)\z/sm) {
+			my ($pfx, $lang, $code, $post) = ($1, $2, $3, $4);
+			my $hl = do_hl_lang($self, \$code, $lang) || \$code;
+			$pfx . $lang . "\n" . $$hl . $post;
+		} else {
+			ascii_html($_);
 		}
-	}
+	} split(/(^``` ?\w+\s*?\n.+?^```\s*$)/sm, $$str));
 }
 
 1;