1 # Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # used for displaying help texts and other non-mail content
5 package PublicInbox::WwwText;
8 use PublicInbox::Linkify;
9 use PublicInbox::WwwStream;
10 use PublicInbox::Hval qw(ascii_html prurl);
11 use HTTP::Date qw(time2str);
12 use URI::Escape qw(uri_escape_utf8);
13 use PublicInbox::GzipFilter qw(gzf_maybe);
14 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
15 our $WIKI_URL = 'https://en.wikipedia.org/wiki';
17 require PublicInbox::HlMod;
18 PublicInbox::HlMod->new
21 # /$INBOX/_/text/$KEY/ # KEY may contain slashes
22 # For now, "help" is the only supported $KEY
27 $key //= 'help'; # this 302s to _/text/help/
29 # get the raw text the same way we get mboxrds
30 my $raw = ($key =~ s!/raw\z!!);
31 my $have_tslash = ($key =~ s!/\z!!) if !$raw;
34 my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ];
35 if (!_default_text($ctx, $key, $hdr, \$txt)) {
37 $txt = "404 Not Found ($key)\n";
39 my $env = $ctx->{env};
42 my $gzf = gzf_maybe($hdr, $env);
43 $txt = $gzf->translate($txt);
46 $hdr->[3] = length($txt);
47 return [ $code, $hdr, [ $txt ] ]
50 # enforce trailing slash for "wget -r" compatibility
51 if (!$have_tslash && $code == 200) {
52 my $url = $ctx->{ibx}->base_url($env);
53 $url .= "_/text/$key/";
55 return [ 302, [ 'Content-Type', 'text/plain',
57 [ "Redirecting to $url\n" ] ];
60 # Follow git commit message conventions,
61 # first line is the Subject/title
62 my ($title) = ($txt =~ /\A([^\n]*)/s);
63 $ctx->{-title_html} = ascii_html($title);
64 my $nslash = ($key =~ tr!/!/!);
65 $ctx->{-upfx} = '../../../' . ('../' x $nslash);
66 my $l = PublicInbox::Linkify->new;
69 $hl->do_hl_text(\$txt);
71 $txt = ascii_html($txt);
73 $txt = '<pre>' . $l->linkify_2($txt) . '</pre>';
74 PublicInbox::WwwStream::html_oneshot($ctx, $code, \$txt);
77 sub _srch_prefix ($$) {
78 my ($srch, $txt) = @_;
81 my $help = $srch->help;
83 for ($i = 0; $i < @$help; $i += 2) {
84 my $pfx = $help->[$i];
86 $pad = $n if $n > $pad;
88 $htxt .= $help->[$i + 1];
92 my $padding = ' ' x ($pad + 8);
93 $htxt =~ s/^/$padding/gms;
94 $htxt =~ s/^$padding(\S+)\0/" $1".
95 (' ' x ($pad - length($1)))/egms;
96 $htxt =~ s/\f\n/\n/gs;
101 sub _colors_help ($$) {
102 my ($ctx, $txt) = @_;
103 my $ibx = $ctx->{ibx};
104 my $env = $ctx->{env};
105 my $base_url = $ibx->base_url($env);
106 $$txt .= "color customization for $base_url\n";
109 public-inbox provides a stable set of CSS classes for users to
110 customize colors for highlighting diffs and code.
112 Users of browsers such as dillo, Firefox, or some browser
113 extensions may start by downloading the following sample CSS file
114 to control the colors they see:
116 ${base_url}userContent.css
122 $$txt .= PublicInbox::UserContent::sample($ibx, $env) . "```\n";
125 # git-config section names are quoted in the config file, so escape them
128 $name =~ s/\\/\\\\/g;
133 sub _coderepo_config ($$) {
134 my ($ctx, $txt) = @_;
135 my $cr = $ctx->{ibx}->{coderepo} // return;
136 # note: this doesn't preserve cgitrc layout, since we parse cgitrc
137 # and drop the original structure
138 $$txt .= "\tcoderepo = $_\n" for @$cr;
141 ; `coderepo' entries allows blob reconstruction via patch emails if
142 ; the inbox is indexed with Xapian. `@@ <from-range> <to-range> @@'
143 ; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
144 ; an HTTP endpoint which reconstructs git blobs via git-apply(1).
146 my $pi_cfg = $ctx->{www}->{pi_cfg};
147 for my $cr_name (@$cr) {
148 my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl");
149 my $path = "/path/to/$cr_name";
150 $cr_name = dq_escape($cr_name);
152 $$txt .= qq([coderepo "$cr_name"]\n);
153 if ($urls && scalar(@$urls)) {
155 $$txt .= join(" ||\n\t;\t", map {;
157 if ($path !~ m![a-z0-9_/\.\-]!i) {
158 $dst = '"'.dq_escape($dst).'"';
160 qq(git clone $_ $dst);
164 $$txt .= "\tdir = $path\n";
165 $$txt .= "\tcgiturl = https://example.com/";
166 $$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n";
170 # n.b. this is a perfect candidate for memoization
171 sub inbox_config ($$$) {
172 my ($ctx, $hdr, $txt) = @_;
173 my $ibx = $ctx->{ibx};
174 push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
175 my $t = eval { $ibx->mm->created_at };
176 push(@$hdr, 'Last-Modified', time2str($t)) if $t;
177 my $name = dq_escape($ibx->{name});
178 my $inboxdir = '/path/to/top-level-inbox';
179 my $base_url = $ibx->base_url($ctx->{env});
181 ; Example public-inbox config snippet for a mirror of
183 ; See public-inbox-config(5) manpage for more details:
184 ; https://public-inbox.org/public-inbox-config.html
185 [publicinbox "$name"]
187 ; note: public-inbox before v1.2.0 used `mainrepo' instead of
188 ; `inboxdir', both remain supported after 1.2
190 url = https://example.com/$name/
191 url = http://example.onion/$name/
193 for my $k (qw(address listid infourl watchheader)) {
194 defined(my $v = $ibx->{$k}) or next;
195 $$txt .= "\t$k = $_\n" for @$v;
197 if (my $altid = $ibx->{altid}) {
198 my $altid_map = $ibx->altid_map;
200 ; altid DBs may be used to provide numeric article ID lookup from
201 ; old, pre-existing sources. You can recreate them via curl(1),
202 ; gzip(1), and sqlite3(1) as documented:
204 for (sort keys %$altid_map) {
205 $$txt .= "\t;\tcurl -d '' $base_url$_.sql.gz | \\\n" .
206 "\t;\tgzip -dc | \\\n" .
207 "\t;\tsqlite3 $inboxdir/$_.sqlite3\n";
208 $$txt .= "\taltid = serial:$_:file=$_.sqlite3\n";
212 for my $k (qw(filter newsgroup obfuscate replyto)) {
213 defined(my $v = $ibx->{$k}) or next;
214 $$txt .= "\t$k = $v\n";
216 $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)});
217 $$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)});
218 _coderepo_config($ctx, $txt);
222 # n.b. this is a perfect candidate for memoization
223 sub extindex_config ($$$) {
224 my ($ctx, $hdr, $txt) = @_;
225 my $ibx = $ctx->{ibx};
226 push @$hdr, 'Content-Disposition', 'inline; filename=extindex.config';
227 my $name = dq_escape($ibx->{name});
228 my $base_url = $ibx->base_url($ctx->{env});
230 ; Example public-inbox config snippet for the external index (extindex) at:
232 ; See public-inbox-config(5)manpage for more details:
233 ; https://public-inbox.org/public-inbox-config.html
235 topdir = /path/to/extindex-topdir
236 url = https://example.com/$name/
237 url = http://example.onion/$name/
239 for my $k (qw(infourl)) {
240 defined(my $v = $ibx->{$k}) or next;
241 $$txt .= "\t$k = $v\n";
243 _coderepo_config($ctx, $txt);
247 sub coderepos_raw ($$) {
248 my ($ctx, $top_url) = @_;
249 my $cr = $ctx->{ibx}->{coderepo} // return ();
250 my $cfg = $ctx->{www}->{pi_cfg};
252 for my $cr_name (@$cr) {
254 my $thing = $ctx->{ibx}->can('cloneurl') ?
255 'public inbox' : 'external index';
257 Code repositories for project(s) associated with this $thing
260 my $urls = $cfg->get_all("coderepo.$cr_name.cgiturl");
263 # relative or absolute URL?, prefix relative
264 # "foo.git" with appropriate number of "../"
265 my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ :
267 $ret[0] .= "\n\t" . prurl($ctx->{env}, $u);
270 $ret[0] .= qq[\n\t$cr_name.git (no URL configured)];
273 @ret; # may be empty, this sub is called as an arg for join()
276 sub _mirror_help ($$) {
277 my ($ctx, $txt) = @_;
278 my $ibx = $ctx->{ibx};
279 my $base_url = $ibx->base_url($ctx->{env});
280 chop $base_url; # no trailing slash for "git clone"
281 my $dir = (split(m!/!, $base_url))[-1];
282 my %seen = ($base_url => 1);
283 my $top_url = $base_url;
284 $top_url =~ s!/[^/]+\z!/!;
285 $$txt .= "public-inbox mirroring instructions\n\n";
286 if ($ibx->can('cloneurl')) { # PublicInbox::Inbox
288 "This public inbox may be cloned and mirrored by anyone:\n";
290 my $max = $ibx->max_git_epoch;
291 # TODO: some of these URLs may be too long and we may need to
292 # do something like code_footer() above, but these are local
294 if (defined($max)) { # v2
295 for my $i (0..$max) {
296 # old epochs my be deleted:
297 -d "$ibx->{inboxdir}/git/$i.git" or next;
298 my $url = "$base_url/$i";
300 push @urls, "$url $dir/git/$i.git";
302 my $nr = scalar(@urls);
305 $$txt .= "# this inbox consists of $nr epochs:";
306 $urls[0] .= " # oldest";
307 $urls[-1] .= " # newest";
310 push @urls, $base_url;
312 # FIXME: epoch splits can be different in other repositories,
313 # use the "cloneurl" file as-is for now:
314 for my $u (@{$ibx->cloneurl}) {
319 $$txt .= join('', map { "\tgit clone --mirror $_\n" } @urls);
320 if (my $addrs = $ibx->{address}) {
321 $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
322 my $v = defined $max ? '-V2' : '-V1';
325 # If you have public-inbox 1.1+ installed, you may
326 # initialize and index your mirror using the following commands:
327 public-inbox-init $v $ibx->{name} $dir/ $base_url \\
329 public-inbox-index $dir
332 } else { # PublicInbox::ExtSearch
334 This is an external index which is an amalgamation of several public inboxes.
335 Each public inbox needs to be mirrored individually.
337 my $v = $ctx->{www}->{pi_cfg}->{lc('publicInbox.wwwListing')};
338 if (($v // '') =~ /\A(?:all|match=domain)\z/) {
340 A list of them is available at $top_url
344 my $cfg_link = "$base_url/_/text/config/raw";
347 Example config snippet for mirrors: $cfg_link
349 if ($ibx->can('imap_url')) {
350 my $imap = $ibx->imap_url($ctx);
353 $$txt .= 'IMAP subfolder(s) available under:';
354 $$txt .= "\n\t" . join("\n\t", @$imap) . "\n";
356 # each subfolder (starting with `0') holds 50K messages at most
360 if ($ibx->can('nntp_url')) {
361 my $nntp = $ibx->nntp_url($ctx);
364 $$txt .= @$nntp == 1 ? 'Newsgroup' : 'Newsgroups are';
365 $$txt .= ' available over NNTP:';
366 $$txt .= "\n\t" . join("\n\t", @$nntp) . "\n";
369 if ($$txt =~ m!\b[^:]+://\w+\.onion/!) {
372 note: .onion URLs require Tor: https://www.torproject.org/
376 my $code_url = prurl($ctx->{env}, $PublicInbox::WwwStream::CODE_URL);
377 $$txt .= join("\n\n",
378 coderepos_raw($ctx, $top_url), # may be empty
379 "AGPL code for this site:\n\tgit clone $code_url");
383 sub _default_text ($$$$) {
384 my ($ctx, $key, $hdr, $txt) = @_;
385 if ($key eq 'mirror') {
386 return _mirror_help($ctx, $txt);
387 } elsif ($key eq 'color') {
388 return _colors_help($ctx, $txt);
389 } elsif ($key eq 'config') {
390 return $ctx->{ibx}->can('cloneurl') ?
391 inbox_config($ctx, $hdr, $txt) :
392 extindex_config($ctx, $hdr, $txt);
395 return if $key ne 'help'; # TODO more keys?
397 my $ibx = $ctx->{ibx};
398 my $base_url = $ibx->base_url($ctx->{env});
399 $$txt .= "public-inbox help for $base_url\n";
405 public-inbox uses Message-ID identifiers in URLs.
406 One may look up messages by substituting Message-IDs
407 (without the leading '<' or trailing '>') into the URL.
408 Forward slash ('/') characters in the Message-IDs
409 need to be escaped as "%2F" (without quotes).
411 Thus, it is possible to retrieve any message by its
412 Message-ID by going to:
414 $base_url<Message-ID>/
416 (without the '<' or '>')
418 Message-IDs are described at:
424 # n.b. we use the Xapian DB for any regeneratable,
425 # order-of-arrival-independent data.
426 my $srch = $ibx->isrch;
432 This public-inbox has search functionality provided by Xapian.
434 It supports typical AND, OR, NOT, '+', '-' queries present
435 in other search engines.
437 We also support search prefixes to limit the scope of the
438 search to certain fields.
440 Prefixes supported in this installation include:
443 _srch_prefix($srch, $txt);
447 Most prefixes are probabilistic, meaning they support stemming
448 and wildcards ('*'). Ranges (such as 'd:') and boolean prefixes
449 do not support stemming or wildcards.
450 The upstream Xapian query parser documentation fully explains
457 my $over = $ibx->over;
463 Message threading is enabled for this public-inbox,
464 additional endpoints for message threads are available:
466 * $base_url<Message-ID>/T/#u
468 Loads the thread belonging to the given <Message-ID>
469 in flat chronological order. The "#u" anchor
470 focuses the browser on the given <Message-ID>.
472 * $base_url<Message-ID>/t/#u
474 Loads the thread belonging to the given <Message-ID>
475 in threaded order with nesting. For deep threads,
476 this requires a wide display or horizontal scrolling.
478 Both of these HTML endpoints are suitable for offline reading
479 using the thread overview at the bottom of each page.
481 Users of feed readers may follow a particular thread using:
483 * $base_url<Message-ID>/t.atom
485 Which loads the thread in Atom Syndication Standard
486 described at Wikipedia and RFC4287:
488 $WIKI_URL/Atom_(standard)
489 https://tools.ietf.org/html/rfc4287
491 Atom Threading Extensions (RFC4685) is supported:
493 https://tools.ietf.org/html/rfc4685
495 Finally, the gzipped mbox for a thread is available for
496 downloading and importing into your favorite mail client:
498 * $base_url<Message-ID>/t.mbox.gz
500 We use the mboxrd variant of the mbox format described
512 This help text is maintained by public-inbox developers
513 reachable via plain-text email at: meta\@public-inbox.org
514 Their inbox is archived at: https://public-inbox.org/meta/
517 # TODO: support admin contact info in ~/.public-inbox/config