]> Sergey Matveev's repositories - public-inbox.git/blobdiff - Documentation/txt2pre
doc: HACKING: add a bit about faster testing
[public-inbox.git] / Documentation / txt2pre
index acc6ca94bb484a42ee57f5d43aaead53f81d8600..cf58bad820f3adc6d83a0d46f36add4c820e404a 100755 (executable)
 #!/usr/bin/env perl
-# Copyright (C) 2014-2015 all contributors <meta@public-inbox.org>
-# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Stupid script to make HTML from preformatted, utf-8 text versions,
 # only generating links for http(s).  Markdown does too much
 # and requires indentation to output preformatted text.
 use strict;
 use warnings;
-use CGI qw/escapeHTML/;
-use Encode qw/encode/;
-my $str = eval { local $/; <> };
-$str = escapeHTML($str);
-$str = encode('us-ascii', $str, Encode::HTMLCREF);
+use PublicInbox::Linkify;
+use PublicInbox::Hval qw(ascii_html);
+my %xurls;
+for (qw[public-inbox.cgi(1)
+       public-inbox-compact(1)
+       public-inbox-config(5)
+       public-inbox-convert(1)
+       public-inbox-daemon(8)
+       public-inbox-edit(1)
+       public-inbox-httpd(1)
+       public-inbox-index(1)
+       public-inbox-init(1)
+       public-inbox-learn(1)
+       public-inbox-mda(1)
+       public-inbox-nntpd(1)
+       public-inbox-overview(7)
+       public-inbox-purge(1)
+       public-inbox-v1-format(5)
+       public-inbox-v2-format(5)
+       public-inbox-watch(1)
+       public-inbox-xcpdb(1)
+]) {
+       my ($n) = (/([\w\-\.]+)/);
+       $xurls{$_} = "$n.html";
+       $xurls{$n} = "$n.html";
+}
+
+for (qw[copydatabase(1) xapian-compact(1)]) {
+       my ($n) = (/([\w\-\.]+)/);
+       $xurls{$_} = ".$n.1.html"
+}
+
+for (qw[make(1) flock(2) setrlimit(2) vfork(2) tmpfs(5)]) {
+       my ($n, $s) = (/([\w\-]+)\((\d)\)/);
+       $xurls{$_} = "http://www.man7.org/linux/man-pages/man$s/$n.$s.html";
+}
+
+for (qw[git(1)
+       git-am(1)
+       git-apply(1)
+       git-config(1)
+       git-daemon(1)
+       git-fetch(1)
+       git-filter-branch(1)
+       git-format-patch(1)
+       git-gc(1)
+       git-http-backend(1)
+       git-imap-send(1)
+       git-init(1)
+       git-send-email(1)
+       gitrepository-layout(5)
+]) {
+       my ($n) = (/([\w\-\.]+)/);
+       $xurls{$_} = "https://kernel.org/pub/software/scm/git/docs/$n.html"
+}
+
+for (qw[
+       sd_listen_fds(3)
+       systemd(1)
+       systemd.unit(5)
+       systemd.socket(5)
+]) {
+       my ($n) = (/([\w\-\.]+)/);
+       $xurls{$_} = "https://www.freedesktop.org/software/systemd/man/$n.html";
+}
+
+$xurls{'spamc(1)'} =
+       'https://spamassassin.apache.org/full/3.4.x/doc/spamc.html';
+$xurls{'grok-pull'} =
+       'https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git' .
+       '/tree/man/grok-pull.1.rst';
+$xurls{'git-filter-repo(1)'} = 'https://github.com/newren/git-filter-repo'.
+                       './blob/master/Documentation/git-filter-repo.txt';
+$xurls{'ssoma(1)'} = 'https://ssoma.public-inbox.org/ssoma.txt';
+$xurls{'cgitrc(5)'} = 'https://git.zx2c4.com/cgit/tree/cgitrc.5.txt';
+$xurls{'prove(1)'} = 'https://perldoc.perl.org/prove.html';
+
+my $str = do { local $/; <STDIN> };
 my ($title) = ($str =~ /\A([^\n]+)/);
+if ($str =~ /^NAME\n\s+([^\n]+)/sm) {
+       # don't link to ourselves
+       $title = $1;
+       if ($title =~ /([\w\.\-]+)/) {
+               delete $xurls{$1};
+       }
+}
+$title = ascii_html($title);
+my $l = PublicInbox::Linkify->new;
+$str = $l->linkify_1($str);
+$str = ascii_html($str);
 
-# temporarily swap &gt; for escape so our s!! to add href works.
-# there's probably a way to do this with only a single s!! ...
-$str =~ s!&gt;!\e!g;
-$str =~ s!\b((ftp|https?)://[\w+\+\&\?\.\%\;/#-]+)!<a\nhref="$1"\n>$1</a>!g;
-$str =~ s!\e!&gt;!g; # swap escapes back to &gt;
+# longest matches, first
+my @keys = sort { length($b) <=> length($a) } keys %xurls;
+my $xkeys = join('|', map { quotemeta } @keys);
+$str =~ s,(?<![>\w_])($xkeys)(?!(?:[\w<\-]|\.html)),
+       qq(<a\nhref=").$xurls{$1}.qq(">$1).($2//'').'</a>',sge;
+
+$str = $l->linkify_2($str);
 
 print '<html><head>',
-  '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
+  qq(<meta\nhttp-equiv="Content-Type"\ncontent="text/html; charset=utf-8"\n/>),
   "<title>$title</title>",
-  "</head><body>\n<pre>",  $str , '</pre></body></html>';
+  "</head><body><pre>",  $str , '</pre></body></html>';
+STDOUT->flush;
+
+# keep mtime on website consistent so clients can cache
+if (-f STDIN && -f STDOUT) {
+       my @st = stat(STDIN);
+       utime($st[8], $st[9], \*STDOUT);
+}