1 # Copyright (C) all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # used for displaying help texts and other non-mail content
5 package PublicInbox::WwwText;
8 use PublicInbox::Linkify;
9 use PublicInbox::WwwStream;
10 use PublicInbox::Hval qw(ascii_html prurl);
11 use HTTP::Date qw(time2str);
12 use URI::Escape qw(uri_escape_utf8);
13 use PublicInbox::GzipFilter qw(gzf_maybe);
14 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
15 our $WIKI_URL = 'https://en.wikipedia.org/wiki';
17 require PublicInbox::HlMod;
18 PublicInbox::HlMod->new
21 # /$INBOX/_/text/$KEY/ # KEY may contain slashes
22 # For now, "help" is the only supported $KEY
27 $key //= 'help'; # this 302s to _/text/help/
29 # get the raw text the same way we get mboxrds
30 my $raw = ($key =~ s!/raw\z!!);
31 my $have_tslash = ($key =~ s!/\z!!) if !$raw;
34 my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ];
35 if (!_default_text($ctx, $key, $hdr, \$txt)) {
37 $txt = "404 Not Found ($key)\n";
39 my $env = $ctx->{env};
41 $txt = gzf_maybe($hdr, $env)->zflush($txt) if $code == 200;
42 $hdr->[3] = length($txt);
43 return [ $code, $hdr, [ $txt ] ]
46 # enforce trailing slash for "wget -r" compatibility
47 if (!$have_tslash && $code == 200) {
48 my $url = $ctx->{ibx}->base_url($env);
49 $url .= "_/text/$key/";
51 return [ 302, [ 'Content-Type', 'text/plain',
53 [ "Redirecting to $url\n" ] ];
56 # Follow git commit message conventions,
57 # first line is the Subject/title
58 my ($title) = ($txt =~ /\A([^\n]*)/s);
59 $ctx->{-title_html} = ascii_html($title);
60 my $nslash = ($key =~ tr!/!/!);
61 $ctx->{-upfx} = '../../../' . ('../' x $nslash);
62 my $l = PublicInbox::Linkify->new;
65 $hl->do_hl_text(\$txt);
67 $txt = ascii_html($txt);
69 $txt = '<pre>' . $l->linkify_2($txt) . '</pre>';
70 PublicInbox::WwwStream::html_oneshot($ctx, $code, \$txt);
73 sub _srch_prefix ($$) {
77 my $help = $ibx->isrch->help;
79 for ($i = 0; $i < @$help; $i += 2) {
80 my $pfx = $help->[$i];
82 $pad = $n if $n > $pad;
84 $htxt .= $help->[$i + 1];
88 my $padding = ' ' x ($pad + 4);
89 $htxt =~ s/^/$padding/gms;
90 $htxt =~ s/^$padding(\S+)\0/" $1".(' ' x ($pad - length($1)))/egms;
91 $htxt =~ s/\f\n/\n/gs;
96 sub _colors_help ($$) {
98 my $ibx = $ctx->{ibx};
99 my $env = $ctx->{env};
100 my $base_url = $ibx->base_url($env);
101 $$txt .= "color customization for $base_url\n";
104 public-inbox provides a stable set of CSS classes for users to
105 customize colors for highlighting diffs and code.
107 Users of browsers such as dillo, Firefox, or some browser
108 extensions may start by downloading the following sample CSS file
109 to control the colors they see:
111 ${base_url}userContent.css
117 $$txt .= PublicInbox::UserContent::sample($ibx, $env) . "```\n";
120 # git-config section names are quoted in the config file, so escape them
123 $name =~ s/\\/\\\\/g;
128 sub _coderepo_config ($$) {
129 my ($ctx, $txt) = @_;
130 my $cr = $ctx->{ibx}->{coderepo} // return;
131 # note: this doesn't preserve cgitrc layout, since we parse cgitrc
132 # and drop the original structure
133 $$txt .= "\tcoderepo = $_\n" for @$cr;
136 ; `coderepo' entries allows blob reconstruction via patch emails if
137 ; the inbox is indexed with Xapian. `@@ <from-range> <to-range> @@'
138 ; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
139 ; an HTTP endpoint which reconstructs git blobs via git-apply(1).
141 my $pi_cfg = $ctx->{www}->{pi_cfg};
142 for my $cr_name (@$cr) {
143 my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl");
144 my $path = "/path/to/$cr_name";
145 $cr_name = dq_escape($cr_name);
147 $$txt .= qq([coderepo "$cr_name"]\n);
148 if ($urls && scalar(@$urls)) {
150 $$txt .= join(" ||\n\t;\t", map {;
152 if ($path !~ m![a-z0-9_/\.\-]!i) {
153 $dst = '"'.dq_escape($dst).'"';
155 qq(git clone $_ $dst);
159 $$txt .= "\tdir = $path\n";
160 $$txt .= "\tcgiturl = https://example.com/";
161 $$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n";
165 # n.b. this is a perfect candidate for memoization
166 sub inbox_config ($$$) {
167 my ($ctx, $hdr, $txt) = @_;
168 my $ibx = $ctx->{ibx};
169 push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
170 my $t = eval { $ibx->mm->created_at };
171 push(@$hdr, 'Last-Modified', time2str($t)) if $t;
172 my $name = dq_escape($ibx->{name});
173 my $inboxdir = '/path/to/top-level-inbox';
174 my $base_url = $ibx->base_url($ctx->{env});
176 ; Example public-inbox config snippet for a mirror of
178 ; See public-inbox-config(5) manpage for more details:
179 ; https://public-inbox.org/public-inbox-config.html
180 [publicinbox "$name"]
182 ; note: public-inbox before v1.2.0 used `mainrepo' instead of
183 ; `inboxdir', both remain supported after 1.2
185 url = https://example.com/$name/
186 url = http://example.onion/$name/
188 for my $k (qw(address listid infourl watchheader)) {
189 defined(my $v = $ibx->{$k}) or next;
190 $$txt .= "\t$k = $_\n" for @$v;
192 if (my $altid = $ibx->{altid}) {
193 my $altid_map = $ibx->altid_map;
195 ; altid DBs may be used to provide numeric article ID lookup from
196 ; old, pre-existing sources. You can recreate them via curl(1),
197 ; gzip(1), and sqlite3(1) as documented:
199 for (sort keys %$altid_map) {
200 $$txt .= "\t;\tcurl -d '' $base_url$_.sql.gz | \\\n" .
201 "\t;\tgzip -dc | \\\n" .
202 "\t;\tsqlite3 $inboxdir/$_.sqlite3\n";
203 $$txt .= "\taltid = serial:$_:file=$_.sqlite3\n";
207 for my $k (qw(filter newsgroup obfuscate replyto)) {
208 defined(my $v = $ibx->{$k}) or next;
209 $$txt .= "\t$k = $v\n";
211 $$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)});
212 $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)});
213 _coderepo_config($ctx, $txt);
217 # n.b. this is a perfect candidate for memoization
218 sub extindex_config ($$$) {
219 my ($ctx, $hdr, $txt) = @_;
220 my $ibx = $ctx->{ibx};
221 push @$hdr, 'Content-Disposition', 'inline; filename=extindex.config';
222 my $name = dq_escape($ibx->{name});
223 my $base_url = $ibx->base_url($ctx->{env});
225 ; Example public-inbox config snippet for the external index (extindex) at:
227 ; See public-inbox-config(5)manpage for more details:
228 ; https://public-inbox.org/public-inbox-config.html
230 topdir = /path/to/extindex-topdir
231 url = https://example.com/$name/
232 url = http://example.onion/$name/
234 for my $k (qw(infourl)) {
235 defined(my $v = $ibx->{$k}) or next;
236 $$txt .= "\t$k = $v\n";
238 _coderepo_config($ctx, $txt);
242 sub coderepos_raw ($$) {
243 my ($ctx, $top_url) = @_;
244 my $cr = $ctx->{ibx}->{coderepo} // return ();
245 my $cfg = $ctx->{www}->{pi_cfg};
247 for my $cr_name (@$cr) {
249 my $thing = $ctx->{ibx}->can('cloneurl') ?
250 'public inbox' : 'external index';
252 Code repositories for project(s) associated with this $thing
255 my $urls = $cfg->get_all("coderepo.$cr_name.cgiturl");
258 # relative or absolute URL?, prefix relative
259 # "foo.git" with appropriate number of "../"
260 my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ :
262 $ret[0] .= "\n\t" . prurl($ctx->{env}, $u);
265 $ret[0] .= qq[\n\t$cr_name.git (no URL configured)];
268 @ret; # may be empty, this sub is called as an arg for join()
271 sub _add_non_http_urls ($$) {
272 my ($ctx, $txt) = @_;
273 $ctx->{ibx}->can('nntp_url') or return; # TODO extindex can have IMAP
274 my $urls = $ctx->{ibx}->imap_url($ctx);
276 $$txt .= "\nIMAP subfolder(s) are available under:";
277 $$txt .= "\n " . join("\n ", @$urls);
280 # each subfolder (starting with `0') holds 50K messages at most
283 $urls = $ctx->{ibx}->nntp_url($ctx);
285 $$txt .= @$urls == 1 ? "\nNewsgroup" : "\nNewsgroups are";
286 $$txt .= ' available over NNTP:';
287 $$txt .= "\n " . join("\n ", @$urls) . "\n";
289 $urls = $ctx->{ibx}->pop3_url($ctx);
291 $urls = join("\n ", @$urls);
294 POP3 access is available:
297 The password is: anonymous
298 The username is: \$(uuidgen)\@$ctx->{ibx}->{newsgroup}
299 where \$(uuidgen) in the output of the `uuidgen' command on your system.
300 The UUID in the username functions as a private cookie (don't share it).
301 Idle accounts will expire periodically.
306 sub _add_onion_note ($) {
308 $$txt =~ m!\b[^:]+://\w+\.onion/!i and $$txt .= <<EOM
310 note: .onion URLs require Tor: https://www.torproject.org/
315 sub _mirror_help ($$) {
316 my ($ctx, $txt) = @_;
317 my $ibx = $ctx->{ibx};
318 my $base_url = $ibx->base_url($ctx->{env});
319 chop $base_url; # no trailing slash for "git clone"
320 my $dir = (split(m!/!, $base_url))[-1];
321 my %seen = ($base_url => 1);
322 my $top_url = $base_url;
323 $top_url =~ s!/[^/]+\z!/!;
324 $$txt .= "public-inbox mirroring instructions\n\n";
325 if ($ibx->can('cloneurl')) { # PublicInbox::Inbox
327 "This public inbox may be cloned and mirrored by anyone:\n";
329 my $max = $ibx->max_git_epoch;
330 # TODO: some of these URLs may be too long and we may need to
331 # do something like code_footer() above, but these are local
333 if (defined($max)) { # v2
334 for my $i (0..$max) {
335 # old epochs my be deleted:
336 -d "$ibx->{inboxdir}/git/$i.git" or next;
337 my $url = "$base_url/$i";
339 push @urls, "$url $dir/git/$i.git";
341 my $nr = scalar(@urls);
343 chomp($$txt .= <<EOM);
345 # this inbox consists of $nr epochs: (no need to clone all of them)
347 $urls[0] .= " # oldest";
348 $urls[-1] .= " # newest";
351 push @urls, $base_url;
353 # FIXME: epoch splits can be different in other repositories,
354 # use the "cloneurl" file as-is for now:
355 for my $u (@{$ibx->cloneurl}) {
360 $$txt .= join('', map { " git clone --mirror $_\n" } @urls);
361 my $addrs = $ibx->{address} // 'inbox@example.com';
362 my $ng = $ibx->{newsgroup} // '';
363 substr($ng, 0, 0, ' --ng ') if $ng;
364 $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
365 my $v = defined $max ? '-V2' : '-V1';
368 # If you have public-inbox 1.1+ installed, you may
369 # initialize and index your mirror using the following commands:
370 public-inbox-init $v$ng \\
371 $ibx->{name} ./$dir $base_url \\
373 public-inbox-index ./$dir
375 } else { # PublicInbox::ExtSearch
377 This is an external index which is an amalgamation of several public inboxes.
378 Each public inbox needs to be mirrored individually.
380 my $v = $ctx->{www}->{pi_cfg}->{lc('publicInbox.wwwListing')};
381 if (($v // '') =~ /\A(?:all|match=domain)\z/) {
383 A list of them is available at $top_url
387 my $cfg_link = "$base_url/_/text/config/raw";
390 Example config snippet for mirrors: $cfg_link
392 _add_non_http_urls($ctx, $txt);
393 _add_onion_note($txt);
395 my $code_url = prurl($ctx->{env}, $PublicInbox::WwwStream::CODE_URL);
396 $$txt .= join("\n\n",
397 coderepos_raw($ctx, $top_url), # may be empty
398 "AGPL code for this site:\n git clone $code_url");
402 sub _default_text ($$$$) {
403 my ($ctx, $key, $hdr, $txt) = @_;
404 if ($key eq 'mirror') {
405 return _mirror_help($ctx, $txt);
406 } elsif ($key eq 'color') {
407 return _colors_help($ctx, $txt);
408 } elsif ($key eq 'config') {
409 return $ctx->{ibx}->can('cloneurl') ?
410 inbox_config($ctx, $hdr, $txt) :
411 extindex_config($ctx, $hdr, $txt);
413 return if $key ne 'help'; # TODO more keys?
415 my $ibx = $ctx->{ibx};
416 my $base_url = $ibx->base_url($ctx->{env});
418 public-inbox help for $base_url
423 public-inbox uses Message-ID identifiers in URLs.
424 One may look up messages by substituting Message-IDs
425 (without the leading '<' or trailing '>') into the URL.
426 Forward slash ('/') characters in the Message-IDs
427 need to be escaped as "%2F" (without quotes).
429 Thus, it is possible to retrieve any message by its
430 Message-ID by going to:
432 $base_url<Message-ID>/
433 (without the '<' or '>')
435 Message-IDs are described at:
441 # n.b. we use the Xapian DB for any regeneratable,
442 # order-of-arrival-independent data.
448 This public-inbox has search functionality provided by Xapian.
450 It supports typical AND, OR, NOT, '+', '-' queries present
451 in other search engines.
453 We also support search prefixes to limit the scope of the
454 search to certain fields.
456 Prefixes supported in this installation include:
459 _srch_prefix($ibx, $txt);
462 Most prefixes are probabilistic, meaning they support stemming
463 and wildcards ('*'). Ranges (such as 'd:') and boolean prefixes
464 do not support stemming or wildcards.
465 The upstream Xapian query parser documentation fully explains
477 Message threading is enabled for this public-inbox,
478 additional endpoints for message threads are available:
480 * $base_url<Message-ID>/T/#u
482 Loads the thread belonging to the given <Message-ID>
483 in flat chronological order. The "#u" anchor
484 focuses the browser on the given <Message-ID>.
486 * $base_url<Message-ID>/t/#u
488 Loads the thread belonging to the given <Message-ID>
489 in threaded order with nesting. For deep threads,
490 this requires a wide display or horizontal scrolling.
492 Both of these HTML endpoints are suitable for offline reading
493 using the thread overview at the bottom of each page.
495 The gzipped mbox for a thread is available for downloading and
496 importing into your favorite mail client:
498 * $base_url<Message-ID>/t.mbox.gz
500 We use the mboxrd variant of the mbox format described at:
504 Users of feed readers may follow a particular thread using:
506 * $base_url<Message-ID>/t.atom
508 Which loads the thread in Atom Syndication Standard
509 described at Wikipedia and RFC4287:
511 $WIKI_URL/Atom_(standard)
512 https://tools.ietf.org/html/rfc4287
514 Atom Threading Extensions (RFC4685) are supported:
516 https://tools.ietf.org/html/rfc4685
521 _add_non_http_urls($ctx, \(my $note = ''));
522 $note and $note =~ s/^/ /gms and $$txt .= <<EOF;
531 This help text is maintained by public-inbox developers
532 reachable via plain-text email at: meta\@public-inbox.org
533 Their inbox is archived at: https://public-inbox.org/meta/
535 # TODO: support admin contact info in ~/.public-inbox/config