X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FHlMod.pm;h=f42ece80ededc4a04f06f453170c7320aaf94391;hb=HEAD;hp=237ffaca0a23723381a28e1cc2d1f7c4aa59e464;hpb=fd8e62c1ed9b9f7eb851a263a9ada2cb8c182193;p=public-inbox.git diff --git a/lib/PublicInbox/HlMod.pm b/lib/PublicInbox/HlMod.pm index 237ffaca..f42ece80 100644 --- a/lib/PublicInbox/HlMod.pm +++ b/lib/PublicInbox/HlMod.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2019 all contributors +# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # I have no idea how stable or safe this is for handling untrusted @@ -14,11 +14,13 @@ # wrapper for SWIG-generated highlight.pm bindings package PublicInbox::HlMod; use strict; -use warnings; +use v5.10.1; use highlight; # SWIG-generated stuff +use PublicInbox::Hval qw(src_escape ascii_html); +my $hl; sub _parse_filetypes ($) { - my $ft_conf = $_[0]->searchFile('filetypes.conf') or + my $ft_conf = $_[0]->getFiletypesConfPath('filetypes') or die 'filetypes.conf not found by highlight'; open my $fh, '<', $ft_conf or die "failed to open($ft_conf): $!"; local $/; @@ -52,16 +54,19 @@ sub _parse_filetypes ($) { (\%ext2lang, \@shebang); } +# We only need one instance sub new { my ($class) = @_; - my $dir = highlight::DataDir->new; - $dir->initSearchDirectories(''); - my ($ext2lang, $shebang) = _parse_filetypes($dir); - bless { - -dir => $dir, - -ext2lang => $ext2lang, - -shebang => $shebang, - }, $class; + $hl ||= do { + my $dir = highlight::DataDir->new; + $dir->initSearchDirectories(''); + my ($ext2lang, $shebang) = _parse_filetypes($dir); + bless { + -dir => $dir, + -ext2lang => $ext2lang, + -shebang => $shebang, + }, $class; + }; } sub _shebang2lang ($$) { @@ -83,43 +88,54 @@ sub _path2lang ($$) { sub do_hl { my ($self, $str, $path) = @_; my $lang = _path2lang($self, $path) if defined $path; - my $dir = $self->{-dir}; + do_hl_lang($self, $str, $lang); +} + +sub do_hl_lang { + my ($self, $str, $lang) = @_; + my $langpath; if (defined $lang) { - $langpath = $dir->getLangPath("$lang.lang") or return; - $langpath = undef unless -f $langpath; - } - unless (defined $langpath) { - $lang = _shebang2lang($self, $str) or return; - $langpath = $dir->getLangPath("$lang.lang") or return; - $langpath = undef unless -f $langpath; + $langpath = $self->{-dir}->getLangPath("$lang.lang") or return; + undef $lang unless -f $langpath; } - return unless defined $langpath; + $lang //= _shebang2lang($self, $str) // return; + $langpath = $self->{-dir}->getLangPath("$lang.lang") or return; + return unless -f $langpath; - my $gen = $self->{$langpath} ||= do { - my $g = highlight::CodeGenerator::getInstance($highlight::HTML); - $g->setFragmentCode(1); # generate html fragment + my $g = highlight::CodeGenerator::getInstance($highlight::HTML); + $g->setFragmentCode(1); # generate html fragment - # whatever theme works - my $themepath = $dir->getThemePath('print.theme'); - $g->initTheme($themepath); - $g->loadLanguage($langpath); - $g->setEncoding('utf-8'); - $g; - }; - \($gen->generateString($$str)) + # whatever theme works + $g->initTheme($self->{-dir}->getThemePath('print.theme')); + $g->loadLanguage($langpath); + $g->setEncoding('utf-8'); + # we assume $$str is valid UTF-8, but the SWIG binding doesn't + # know that, so ensure it's marked as UTF-8 even if it isnt... + my $out = $g->generateString($$str); + highlight::CodeGenerator::deleteInstance($g); + utf8::decode($out); + src_escape($out); + \$out; } -# SWIG instances aren't reference-counted, but $self is; -# so we need to delete all the CodeGenerator instances manually -# at our own destruction -sub DESTROY { - my ($self) = @_; - foreach my $gen (values %$self) { - if (ref($gen) eq 'highlight::CodeGenerator') { - highlight::CodeGenerator::deleteInstance($gen); +# Highlight text, but support Markdown "```$LANG" notation +# while preserving WYSIWYG of plain-text documentation. +# This is NOT to be enabled by default or encouraged for parsing +# emails, since it is NOT stable and can lead to standards +# proliferation of email. +sub do_hl_text { + my ($self, $str) = @_; + + $$str = join('', map { + if (/\A(``` ?)(\w+)\s*?\n(.+)(^```\s*)\z/sm) { + my ($pfx, $lang, $code, $post) = ($1, $2, $3, $4); + my $hl = do_hl_lang($self, \$code, $lang) || \$code; + $pfx . $lang . "\n" . $$hl . $post; + } else { + ascii_html($_); } - } + } split(/(^``` ?\w+\s*?\n.+?^```\s*$)/sm, $$str)); } 1;