1 # Copyright (C) 2019 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # I have no idea how stable or safe this is for handling untrusted
5 # input, but it seems to have been around for a while, and the
6 # highlight(1) executable is supported by gitweb and cgit.
8 # I'm also unsure about API stability, but highlight 3.x seems to
9 # have been around a few years and ikiwiki (apparently the only
10 # user of the SWIG/Perl bindings, at least in Debian) hasn't needed
11 # major changes to support it in recent years.
13 # Some code stolen from ikiwiki (GPL-2.0+)
14 # wrapper for SWIG-generated highlight.pm bindings
15 package PublicInbox::HlMod;
18 use highlight; # SWIG-generated stuff
20 sub _parse_filetypes ($) {
21 my $ft_conf = $_[0]->searchFile('filetypes.conf') or
22 die 'filetypes.conf not found by highlight';
23 open my $fh, '<', $ft_conf or die "failed to open($ft_conf): $!";
27 my @shebang; # order matters
29 # Hrm... why isn't this exposed by the highlight API?
30 # highlight >= 3.2 format (bind-style) (from ikiwiki)
31 while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
32 Extensions\s*=\s*{([^}]+)}/sgx) {
34 foreach my $bit (split(/,/, $2)) {
35 $bit =~ s/.*"(.*)".*/$1/s;
36 $ext2lang{$bit} = $lang;
39 # AFAIK, all the regexps used by in filetypes.conf distributed
40 # by highlight work as Perl REs
41 while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
42 Shebang\s*=\s*\[\s*\[([^}]+)\s*\]\s*\]\s*}\s*,/sgx) {
43 my ($lang, $re) = ($1, $2);
45 my $perl_re = qr/$re/;
46 push @shebang, [ $lang, $perl_re ];
49 warn "$lang shebang=[[$re]] did not work in Perl: $@";
52 (\%ext2lang, \@shebang);
57 my $dir = highlight::DataDir->new;
58 $dir->initSearchDirectories('');
59 my ($ext2lang, $shebang) = _parse_filetypes($dir);
62 -ext2lang => $ext2lang,
67 sub _shebang2lang ($$) {
68 my ($self, $str) = @_;
69 my $shebang = $self->{-shebang};
70 foreach my $s (@$shebang) {
71 return $s->[0] if $$str =~ $s->[1];
77 my ($self, $path) = @_;
78 my ($ext) = ($path =~ m!([^\\/\.]+)\z!);
80 $self->{-ext2lang}->{$ext} || $ext;
84 my ($self, $str, $path) = @_;
85 my $lang = _path2lang($self, $path) if defined $path;
86 my $dir = $self->{-dir};
89 $langpath = $dir->getLangPath("$lang.lang") or return;
90 $langpath = undef unless -f $langpath;
92 unless (defined $langpath) {
93 $lang = _shebang2lang($self, $str) or return;
94 $langpath = $dir->getLangPath("$lang.lang") or return;
95 $langpath = undef unless -f $langpath;
97 return unless defined $langpath;
99 my $gen = $self->{$langpath} ||= do {
100 my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
101 $g->setFragmentCode(1); # generate html fragment
102 $g->setHTMLEnclosePreTag(1); # include <pre>
104 # whatever theme works
105 my $themepath = $dir->getThemePath('print.theme');
106 $g->initTheme($themepath);
107 $g->loadLanguage($langpath);
108 $g->setEncoding('utf-8');
111 \($gen->generateString($$str))
114 # SWIG instances aren't reference-counted, but $self is;
115 # so we need to delete all the CodeGenerator instances manually
116 # at our own destruction
119 foreach my $gen (values %$self) {
120 if (ref($gen) eq 'highlight::CodeGenerator') {
121 highlight::CodeGenerator::deleteInstance($gen);