1 # Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # used for displaying help texts and other non-mail content
5 package PublicInbox::WwwText;
8 use PublicInbox::Linkify;
9 use PublicInbox::WwwStream;
10 use PublicInbox::Hval qw(ascii_html prurl);
11 use URI::Escape qw(uri_escape_utf8);
12 use PublicInbox::GzipFilter qw(gzf_maybe);
13 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
14 our $WIKI_URL = 'https://en.wikipedia.org/wiki';
16 require PublicInbox::HlMod;
17 PublicInbox::HlMod->new
20 # /$INBOX/_/text/$KEY/ # KEY may contain slashes
21 # For now, "help" is the only supported $KEY
26 $key //= 'help'; # this 302s to _/text/help/
28 # get the raw text the same way we get mboxrds
29 my $raw = ($key =~ s!/raw\z!!);
30 my $have_tslash = ($key =~ s!/\z!!) if !$raw;
33 my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ];
34 if (!_default_text($ctx, $key, $hdr, \$txt)) {
36 $txt = "404 Not Found ($key)\n";
38 my $env = $ctx->{env};
41 my $gzf = gzf_maybe($hdr, $env);
42 $txt = $gzf->translate($txt);
45 $hdr->[3] = length($txt);
46 return [ $code, $hdr, [ $txt ] ]
49 # enforce trailing slash for "wget -r" compatibility
50 if (!$have_tslash && $code == 200) {
51 my $url = $ctx->{ibx}->base_url($env);
52 $url .= "_/text/$key/";
54 return [ 302, [ 'Content-Type', 'text/plain',
56 [ "Redirecting to $url\n" ] ];
59 # Follow git commit message conventions,
60 # first line is the Subject/title
61 my ($title) = ($txt =~ /\A([^\n]*)/s);
62 $ctx->{-title_html} = ascii_html($title);
63 my $nslash = ($key =~ tr!/!/!);
64 $ctx->{-upfx} = '../../../' . ('../' x $nslash);
65 my $l = PublicInbox::Linkify->new;
68 $hl->do_hl_text(\$txt);
70 $txt = ascii_html($txt);
72 $txt = '<pre>' . $l->linkify_2($txt) . '</pre>';
73 PublicInbox::WwwStream::html_oneshot($ctx, $code, \$txt);
76 sub _srch_prefix ($$) {
77 my ($srch, $txt) = @_;
80 my $help = $srch->help;
82 for ($i = 0; $i < @$help; $i += 2) {
83 my $pfx = $help->[$i];
85 $pad = $n if $n > $pad;
87 $htxt .= $help->[$i + 1];
91 my $padding = ' ' x ($pad + 8);
92 $htxt =~ s/^/$padding/gms;
93 $htxt =~ s/^$padding(\S+)\0/" $1".
94 (' ' x ($pad - length($1)))/egms;
95 $htxt =~ s/\f\n/\n/gs;
100 sub _colors_help ($$) {
101 my ($ctx, $txt) = @_;
102 my $ibx = $ctx->{ibx};
103 my $env = $ctx->{env};
104 my $base_url = $ibx->base_url($env);
105 $$txt .= "color customization for $base_url\n";
108 public-inbox provides a stable set of CSS classes for users to
109 customize colors for highlighting diffs and code.
111 Users of browsers such as dillo, Firefox, or some browser
112 extensions may start by downloading the following sample CSS file
113 to control the colors they see:
115 ${base_url}userContent.css
121 $$txt .= PublicInbox::UserContent::sample($ibx, $env) . "```\n";
124 # git-config section names are quoted in the config file, so escape them
127 $name =~ s/\\/\\\\/g;
132 sub _coderepo_config ($$) {
133 my ($ctx, $txt) = @_;
134 my $cr = $ctx->{ibx}->{coderepo} // return;
135 # note: this doesn't preserve cgitrc layout, since we parse cgitrc
136 # and drop the original structure
137 $$txt .= "\tcoderepo = $_\n" for @$cr;
140 ; `coderepo' entries allows blob reconstruction via patch emails if
141 ; the inbox is indexed with Xapian. `@@ <from-range> <to-range> @@'
142 ; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
143 ; an HTTP endpoint which reconstructs git blobs via git-apply(1).
145 my $pi_cfg = $ctx->{www}->{pi_cfg};
146 for my $cr_name (@$cr) {
147 my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl");
148 my $path = "/path/to/$cr_name";
149 $cr_name = dq_escape($cr_name);
151 $$txt .= qq([coderepo "$cr_name"]\n);
152 if ($urls && scalar(@$urls)) {
154 $$txt .= join(" ||\n\t;\t", map {;
156 if ($path !~ m![a-z0-9_/\.\-]!i) {
157 $dst = '"'.dq_escape($dst).'"';
159 qq(git clone $_ $dst);
163 $$txt .= "\tdir = $path\n";
164 $$txt .= "\tcgiturl = https://example.com/";
165 $$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n";
169 # n.b. this is a perfect candidate for memoization
170 sub inbox_config ($$$) {
171 my ($ctx, $hdr, $txt) = @_;
172 my $ibx = $ctx->{ibx};
173 push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
174 my $name = dq_escape($ibx->{name});
175 my $inboxdir = '/path/to/top-level-inbox';
176 my $base_url = $ibx->base_url($ctx->{env});
178 ; Example public-inbox config snippet for a mirror of
180 ; See public-inbox-config(5) manpage for more details:
181 ; https://public-inbox.org/public-inbox-config.html
182 [publicinbox "$name"]
184 ; note: public-inbox before v1.2.0 used `mainrepo' instead of
185 ; `inboxdir', both remain supported after 1.2
187 url = https://example.com/$name/
188 url = http://example.onion/$name/
190 for my $k (qw(address listid infourl watchheader)) {
191 defined(my $v = $ibx->{$k}) or next;
192 $$txt .= "\t$k = $_\n" for @$v;
194 if (my $altid = $ibx->{altid}) {
195 my $altid_map = $ibx->altid_map;
197 ; altid DBs may be used to provide numeric article ID lookup from
198 ; old, pre-existing sources. You can recreate them via curl(1),
199 ; gzip(1), and sqlite3(1) as documented:
201 for (sort keys %$altid_map) {
202 $$txt .= "\t;\tcurl -d '' $base_url$_.sql.gz | \\\n" .
203 "\t;\tgzip -dc | \\\n" .
204 "\t;\tsqlite3 $inboxdir/$_.sqlite3\n";
205 $$txt .= "\taltid = serial:$_:file=$_.sqlite3\n";
209 for my $k (qw(filter newsgroup obfuscate replyto)) {
210 defined(my $v = $ibx->{$k}) or next;
211 $$txt .= "\t$k = $v\n";
213 $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)});
214 $$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)});
215 _coderepo_config($ctx, $txt);
219 # n.b. this is a perfect candidate for memoization
220 sub extindex_config ($$$) {
221 my ($ctx, $hdr, $txt) = @_;
222 my $ibx = $ctx->{ibx};
223 push @$hdr, 'Content-Disposition', 'inline; filename=extindex.config';
224 my $name = dq_escape($ibx->{name});
225 my $base_url = $ibx->base_url($ctx->{env});
227 ; Example public-inbox config snippet for the external index (extindex) at:
229 ; See public-inbox-config(5)manpage for more details:
230 ; https://public-inbox.org/public-inbox-config.html
232 topdir = /path/to/extindex-topdir
233 url = https://example.com/$name/
234 url = http://example.onion/$name/
236 for my $k (qw(infourl)) {
237 defined(my $v = $ibx->{$k}) or next;
238 $$txt .= "\t$k = $v\n";
240 _coderepo_config($ctx, $txt);
244 sub coderepos_raw ($$) {
245 my ($ctx, $top_url) = @_;
246 my $cr = $ctx->{ibx}->{coderepo} // return ();
247 my $cfg = $ctx->{www}->{pi_cfg};
249 for my $cr_name (@$cr) {
251 my $thing = $ctx->{ibx}->can('cloneurl') ?
252 'public inbox' : 'external index';
254 Code repositories for project(s) associated with this $thing
257 my $urls = $cfg->get_all("coderepo.$cr_name.cgiturl");
260 # relative or absolute URL?, prefix relative
261 # "foo.git" with appropriate number of "../"
262 my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ :
264 $ret[0] .= "\n\t" . prurl($ctx->{env}, $u);
267 $ret[0] .= qq[\n\t$cr_name.git (no URL configured)];
270 @ret; # may be empty, this sub is called as an arg for join()
273 sub _mirror_help ($$) {
274 my ($ctx, $txt) = @_;
275 my $ibx = $ctx->{ibx};
276 my $base_url = $ibx->base_url($ctx->{env});
277 chop $base_url; # no trailing slash for "git clone"
278 my $dir = (split(m!/!, $base_url))[-1];
279 my %seen = ($base_url => 1);
280 my $top_url = $base_url;
281 $top_url =~ s!/[^/]+\z!/!;
282 $$txt .= "public-inbox mirroring instructions\n\n";
283 if ($ibx->can('cloneurl')) { # PublicInbox::Inbox
285 "This public inbox may be cloned and mirrored by anyone:\n";
287 my $max = $ibx->max_git_epoch;
288 # TODO: some of these URLs may be too long and we may need to
289 # do something like code_footer() above, but these are local
291 if (defined($max)) { # v2
292 for my $i (0..$max) {
293 # old epochs my be deleted:
294 -d "$ibx->{inboxdir}/git/$i.git" or next;
295 my $url = "$base_url/$i";
297 push @urls, "$url $dir/git/$i.git";
299 my $nr = scalar(@urls);
302 $$txt .= "# this inbox consists of $nr epochs:";
303 $urls[0] .= " # oldest";
304 $urls[-1] .= " # newest";
307 push @urls, $base_url;
309 # FIXME: epoch splits can be different in other repositories,
310 # use the "cloneurl" file as-is for now:
311 for my $u (@{$ibx->cloneurl}) {
316 $$txt .= join('', map { "\tgit clone --mirror $_\n" } @urls);
317 if (my $addrs = $ibx->{address}) {
318 $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
319 my $v = defined $max ? '-V2' : '-V1';
322 # If you have public-inbox 1.1+ installed, you may
323 # initialize and index your mirror using the following commands:
324 public-inbox-init $v $ibx->{name} $dir/ $base_url \\
326 public-inbox-index $dir
329 } else { # PublicInbox::ExtSearch
331 This is an external index which is an amalgamation of several public inboxes.
332 Each public inbox needs to be mirrored individually.
334 my $v = $ctx->{www}->{pi_cfg}->{lc('publicInbox.wwwListing')};
335 if (($v // '') =~ /\A(?:all|match=domain)\z/) {
337 A list of them is available at $top_url
341 my $cfg_link = "$base_url/_/text/config/raw";
344 Example config snippet for mirrors: $cfg_link
346 if ($ibx->can('imap_url')) {
347 my $imap = $ibx->imap_url($ctx);
350 $$txt .= 'IMAP subfolder(s) available under:';
351 $$txt .= "\n\t" . join("\n\t", @$imap) . "\n";
353 # each subfolder (starting with `0') holds 50K messages at most
357 if ($ibx->can('nntp_url')) {
358 my $nntp = $ibx->nntp_url($ctx);
361 $$txt .= @$nntp == 1 ? 'Newsgroup' : 'Newsgroups are';
362 $$txt .= ' available over NNTP:';
363 $$txt .= "\n\t" . join("\n\t", @$nntp) . "\n";
366 if ($$txt =~ m!\b[^:]+://\w+\.onion/!) {
369 note: .onion URLs require Tor: https://www.torproject.org/
373 my $code_url = prurl($ctx->{env}, $PublicInbox::WwwStream::CODE_URL);
374 $$txt .= join("\n\n",
375 coderepos_raw($ctx, $top_url), # may be empty
376 "AGPL code for this site:\n\tgit clone $code_url");
380 sub _default_text ($$$$) {
381 my ($ctx, $key, $hdr, $txt) = @_;
382 if ($key eq 'mirror') {
383 return _mirror_help($ctx, $txt);
384 } elsif ($key eq 'color') {
385 return _colors_help($ctx, $txt);
386 } elsif ($key eq 'config') {
387 return $ctx->{ibx}->can('cloneurl') ?
388 inbox_config($ctx, $hdr, $txt) :
389 extindex_config($ctx, $hdr, $txt);
392 return if $key ne 'help'; # TODO more keys?
394 my $ibx = $ctx->{ibx};
395 my $base_url = $ibx->base_url($ctx->{env});
396 $$txt .= "public-inbox help for $base_url\n";
402 public-inbox uses Message-ID identifiers in URLs.
403 One may look up messages by substituting Message-IDs
404 (without the leading '<' or trailing '>') into the URL.
405 Forward slash ('/') characters in the Message-IDs
406 need to be escaped as "%2F" (without quotes).
408 Thus, it is possible to retrieve any message by its
409 Message-ID by going to:
411 $base_url<Message-ID>/
413 (without the '<' or '>')
415 Message-IDs are described at:
421 # n.b. we use the Xapian DB for any regeneratable,
422 # order-of-arrival-independent data.
423 my $srch = $ibx->isrch;
429 This public-inbox has search functionality provided by Xapian.
431 It supports typical AND, OR, NOT, '+', '-' queries present
432 in other search engines.
434 We also support search prefixes to limit the scope of the
435 search to certain fields.
437 Prefixes supported in this installation include:
440 _srch_prefix($srch, $txt);
444 Most prefixes are probabilistic, meaning they support stemming
445 and wildcards ('*'). Ranges (such as 'd:') and boolean prefixes
446 do not support stemming or wildcards.
447 The upstream Xapian query parser documentation fully explains
454 my $over = $ibx->over;
460 Message threading is enabled for this public-inbox,
461 additional endpoints for message threads are available:
463 * $base_url<Message-ID>/T/#u
465 Loads the thread belonging to the given <Message-ID>
466 in flat chronological order. The "#u" anchor
467 focuses the browser on the given <Message-ID>.
469 * $base_url<Message-ID>/t/#u
471 Loads the thread belonging to the given <Message-ID>
472 in threaded order with nesting. For deep threads,
473 this requires a wide display or horizontal scrolling.
475 Both of these HTML endpoints are suitable for offline reading
476 using the thread overview at the bottom of each page.
478 Users of feed readers may follow a particular thread using:
480 * $base_url<Message-ID>/t.atom
482 Which loads the thread in Atom Syndication Standard
483 described at Wikipedia and RFC4287:
485 $WIKI_URL/Atom_(standard)
486 https://tools.ietf.org/html/rfc4287
488 Atom Threading Extensions (RFC4685) is supported:
490 https://tools.ietf.org/html/rfc4685
492 Finally, the gzipped mbox for a thread is available for
493 downloading and importing into your favorite mail client:
495 * $base_url<Message-ID>/t.mbox.gz
497 We use the mboxrd variant of the mbox format described
509 This help text is maintained by public-inbox developers
510 reachable via plain-text email at: meta\@public-inbox.org
511 Their inbox is archived at: https://public-inbox.org/meta/
514 # TODO: support admin contact info in ~/.public-inbox/config