1 # Copyright (C) all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # used for displaying help texts and other non-mail content
5 package PublicInbox::WwwText;
8 use PublicInbox::Linkify;
9 use PublicInbox::WwwStream;
10 use PublicInbox::Hval qw(ascii_html prurl);
11 use HTTP::Date qw(time2str);
12 use URI::Escape qw(uri_escape_utf8);
13 use PublicInbox::GzipFilter qw(gzf_maybe);
14 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
15 our $WIKI_URL = 'https://en.wikipedia.org/wiki';
17 require PublicInbox::HlMod;
18 PublicInbox::HlMod->new
21 # /$INBOX/_/text/$KEY/ # KEY may contain slashes
22 # For now, "help" is the only supported $KEY
27 $key //= 'help'; # this 302s to _/text/help/
29 # get the raw text the same way we get mboxrds
30 my $raw = ($key =~ s!/raw\z!!);
31 my $have_tslash = ($key =~ s!/\z!!) if !$raw;
34 my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ];
35 if (!_default_text($ctx, $key, $hdr, \$txt)) {
37 $txt = "404 Not Found ($key)\n";
39 my $env = $ctx->{env};
41 $txt = gzf_maybe($hdr, $env)->zflush($txt) if $code == 200;
42 $hdr->[3] = length($txt);
43 return [ $code, $hdr, [ $txt ] ]
46 # enforce trailing slash for "wget -r" compatibility
47 if (!$have_tslash && $code == 200) {
48 my $url = $ctx->{ibx}->base_url($env);
49 $url .= "_/text/$key/";
51 return [ 302, [ 'Content-Type', 'text/plain',
53 [ "Redirecting to $url\n" ] ];
56 # Follow git commit message conventions,
57 # first line is the Subject/title
58 my ($title) = ($txt =~ /\A([^\n]*)/s);
59 $ctx->{-title_html} = ascii_html($title);
60 my $nslash = ($key =~ tr!/!/!);
61 $ctx->{-upfx} = '../../../' . ('../' x $nslash);
62 my $l = PublicInbox::Linkify->new;
65 $hl->do_hl_text(\$txt);
67 $txt = ascii_html($txt);
69 $txt = '<pre>' . $l->linkify_2($txt) . '</pre>';
70 $txt =~ s!\bPOP3\b!<a\nid=pop3>POP3</a>!;
71 $txt =~ s!\bNewsgroups\b!<a\nid=nntp>Newsgroups</a>!;
72 $txt =~ s!\bIMAP\b!<a\nid=imap>IMAP</a>!;
73 PublicInbox::WwwStream::html_oneshot($ctx, $code, \$txt);
76 sub _srch_prefix ($$) {
80 my $help = $ibx->isrch->help;
82 for ($i = 0; $i < @$help; $i += 2) {
83 my $pfx = $help->[$i];
85 $pad = $n if $n > $pad;
87 $htxt .= $help->[$i + 1];
91 my $padding = ' ' x ($pad + 4);
92 $htxt =~ s/^/$padding/gms;
93 $htxt =~ s/^$padding(\S+)\0/" $1".(' ' x ($pad - length($1)))/egms;
94 $htxt =~ s/\f\n/\n/gs;
99 sub _colors_help ($$) {
100 my ($ctx, $txt) = @_;
101 my $ibx = $ctx->{ibx};
102 my $env = $ctx->{env};
103 my $base_url = $ibx->base_url($env);
104 $$txt .= "color customization for $base_url\n";
107 public-inbox provides a stable set of CSS classes for users to
108 customize colors for highlighting diffs and code.
110 Users of browsers such as dillo, Firefox, or some browser
111 extensions may start by downloading the following sample CSS file
112 to control the colors they see:
114 ${base_url}userContent.css
120 $$txt .= PublicInbox::UserContent::sample($ibx, $env) . "```\n";
123 # git-config section names are quoted in the config file, so escape them
126 $name =~ s/\\/\\\\/g;
131 sub _coderepo_config ($$) {
132 my ($ctx, $txt) = @_;
133 my $cr = $ctx->{ibx}->{coderepo} // return;
134 # note: this doesn't preserve cgitrc layout, since we parse cgitrc
135 # and drop the original structure
136 $$txt .= "\tcoderepo = $_\n" for @$cr;
139 ; `coderepo' entries allows blob reconstruction via patch emails if
140 ; the inbox is indexed with Xapian. `@@ <from-range> <to-range> @@'
141 ; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
142 ; an HTTP endpoint which reconstructs git blobs via git-apply(1).
144 my $pi_cfg = $ctx->{www}->{pi_cfg};
145 for my $cr_name (@$cr) {
146 my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl");
147 my $path = "/path/to/$cr_name";
148 $cr_name = dq_escape($cr_name);
150 $$txt .= qq([coderepo "$cr_name"]\n);
151 if ($urls && scalar(@$urls)) {
153 $$txt .= join(" ||\n\t;\t", map {;
155 if ($path !~ m![a-z0-9_/\.\-]!i) {
156 $dst = '"'.dq_escape($dst).'"';
158 qq(git clone $_ $dst);
162 $$txt .= "\tdir = $path\n";
163 $$txt .= "\tcgiturl = https://example.com/";
164 $$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n";
168 # n.b. this is a perfect candidate for memoization
169 sub inbox_config ($$$) {
170 my ($ctx, $hdr, $txt) = @_;
171 my $ibx = $ctx->{ibx};
172 push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
173 my $t = eval { $ibx->mm->created_at };
174 push(@$hdr, 'Last-Modified', time2str($t)) if $t;
175 my $name = dq_escape($ibx->{name});
176 my $inboxdir = '/path/to/top-level-inbox';
177 my $base_url = $ibx->base_url($ctx->{env});
179 ; Example public-inbox config snippet for a mirror of
181 ; See public-inbox-config(5) manpage for more details:
182 ; https://public-inbox.org/public-inbox-config.html
183 [publicinbox "$name"]
185 ; note: public-inbox before v1.2.0 used `mainrepo' instead of
186 ; `inboxdir', both remain supported after 1.2
188 url = https://example.com/$name/
189 url = http://example.onion/$name/
191 for my $k (qw(address listid infourl watchheader)) {
192 defined(my $v = $ibx->{$k}) or next;
193 $$txt .= "\t$k = $_\n" for @$v;
195 if (my $altid = $ibx->{altid}) {
196 my $altid_map = $ibx->altid_map;
198 ; altid DBs may be used to provide numeric article ID lookup from
199 ; old, pre-existing sources. You can recreate them via curl(1),
200 ; gzip(1), and sqlite3(1) as documented:
202 for (sort keys %$altid_map) {
203 $$txt .= "\t;\tcurl -d '' $base_url$_.sql.gz | \\\n" .
204 "\t;\tgzip -dc | \\\n" .
205 "\t;\tsqlite3 $inboxdir/$_.sqlite3\n";
206 $$txt .= "\taltid = serial:$_:file=$_.sqlite3\n";
210 for my $k (qw(filter newsgroup obfuscate replyto)) {
211 defined(my $v = $ibx->{$k}) or next;
212 $$txt .= "\t$k = $v\n";
214 $$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)});
215 $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)});
216 _coderepo_config($ctx, $txt);
220 # n.b. this is a perfect candidate for memoization
221 sub extindex_config ($$$) {
222 my ($ctx, $hdr, $txt) = @_;
223 my $ibx = $ctx->{ibx};
224 push @$hdr, 'Content-Disposition', 'inline; filename=extindex.config';
225 my $name = dq_escape($ibx->{name});
226 my $base_url = $ibx->base_url($ctx->{env});
228 ; Example public-inbox config snippet for the external index (extindex) at:
230 ; See public-inbox-config(5)manpage for more details:
231 ; https://public-inbox.org/public-inbox-config.html
233 topdir = /path/to/extindex-topdir
234 url = https://example.com/$name/
235 url = http://example.onion/$name/
237 for my $k (qw(infourl)) {
238 defined(my $v = $ibx->{$k}) or next;
239 $$txt .= "\t$k = $v\n";
241 _coderepo_config($ctx, $txt);
245 sub coderepos_raw ($$) {
246 my ($ctx, $top_url) = @_;
247 my $cr = $ctx->{ibx}->{coderepo} // return ();
248 my $cfg = $ctx->{www}->{pi_cfg};
250 for my $cr_name (@$cr) {
252 my $thing = $ctx->{ibx}->can('cloneurl') ?
253 'public inbox' : 'external index';
255 Code repositories for project(s) associated with this $thing
258 my $urls = $cfg->get_all("coderepo.$cr_name.cgiturl");
261 # relative or absolute URL?, prefix relative
262 # "foo.git" with appropriate number of "../"
263 my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ :
265 $ret[0] .= "\n\t" . prurl($ctx->{env}, $u);
268 $ret[0] .= qq[\n\t$cr_name.git (no URL configured)];
271 @ret; # may be empty, this sub is called as an arg for join()
274 sub _add_non_http_urls ($$) {
275 my ($ctx, $txt) = @_;
276 $ctx->{ibx}->can('nntp_url') or return; # TODO extindex can have IMAP
277 my $urls = $ctx->{ibx}->imap_url($ctx);
279 $$txt .= "\nIMAP subfolder(s) are available under:";
280 $$txt .= "\n " . join("\n ", @$urls);
283 # each subfolder (starting with `0') holds 50K messages at most
286 $urls = $ctx->{ibx}->nntp_url($ctx);
288 $$txt .= @$urls == 1 ? "\nNewsgroup" : "\nNewsgroups are";
289 $$txt .= ' available over NNTP:';
290 $$txt .= "\n " . join("\n ", @$urls) . "\n";
292 $urls = $ctx->{ibx}->pop3_url($ctx);
294 $urls = join("\n ", @$urls);
297 POP3 access is available:
300 The POP3 password is: anonymous
301 The POP3 username is: \$(uuidgen)\@$ctx->{ibx}->{newsgroup}
302 where \$(uuidgen) in the output of the `uuidgen' command on your system.
303 The UUID in the username functions as a private cookie (don't share it).
304 Idle accounts will expire periodically.
309 sub _add_onion_note ($) {
311 $$txt =~ m!\b[^:]+://\w+\.onion/!i and $$txt .= <<EOM
313 note: .onion URLs require Tor: https://www.torproject.org/
318 sub _mirror_help ($$) {
319 my ($ctx, $txt) = @_;
320 my $ibx = $ctx->{ibx};
321 my $base_url = $ibx->base_url($ctx->{env});
322 chop $base_url; # no trailing slash for "git clone"
323 my $dir = (split(m!/!, $base_url))[-1];
324 my %seen = ($base_url => 1);
325 my $top_url = $base_url;
326 $top_url =~ s!/[^/]+\z!/!;
327 $$txt .= "public-inbox mirroring instructions\n\n";
328 if ($ibx->can('cloneurl')) { # PublicInbox::Inbox
330 "This public inbox may be cloned and mirrored by anyone:\n";
332 my $max = $ibx->max_git_epoch;
333 # TODO: some of these URLs may be too long and we may need to
334 # do something like code_footer() above, but these are local
336 if (defined($max)) { # v2
337 for my $i (0..$max) {
338 # old epochs my be deleted:
339 -d "$ibx->{inboxdir}/git/$i.git" or next;
340 my $url = "$base_url/$i";
342 push @urls, "$url $dir/git/$i.git";
344 my $nr = scalar(@urls);
346 chomp($$txt .= <<EOM);
348 # this inbox consists of $nr epochs: (no need to clone all of them)
350 $urls[0] .= " # oldest";
351 $urls[-1] .= " # newest";
354 push @urls, $base_url;
356 # FIXME: epoch splits can be different in other repositories,
357 # use the "cloneurl" file as-is for now:
358 for my $u (@{$ibx->cloneurl}) {
363 $$txt .= join('', map { " git clone --mirror $_\n" } @urls);
364 my $addrs = $ibx->{address} // 'inbox@example.com';
365 my $ng = $ibx->{newsgroup} // '';
366 substr($ng, 0, 0, ' --ng ') if $ng;
367 $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
368 my $v = defined $max ? '-V2' : '-V1';
371 # If you have public-inbox 1.1+ installed, you may
372 # initialize and index your mirror using the following commands:
373 public-inbox-init $v$ng \\
374 $ibx->{name} ./$dir $base_url \\
376 public-inbox-index ./$dir
378 } else { # PublicInbox::ExtSearch
380 This is an external index which is an amalgamation of several public inboxes.
381 Each public inbox needs to be mirrored individually.
383 my $v = $ctx->{www}->{pi_cfg}->{lc('publicInbox.wwwListing')};
384 if (($v // '') =~ /\A(?:all|match=domain)\z/) {
386 A list of them is available at $top_url
390 my $cfg_link = "$base_url/_/text/config/raw";
393 Example config snippet for mirrors: $cfg_link
395 _add_non_http_urls($ctx, $txt);
396 _add_onion_note($txt);
398 my $code_url = prurl($ctx->{env}, $PublicInbox::WwwStream::CODE_URL);
399 $$txt .= join("\n\n",
400 coderepos_raw($ctx, $top_url), # may be empty
401 "AGPL code for this site:\n git clone $code_url");
405 sub _default_text ($$$$) {
406 my ($ctx, $key, $hdr, $txt) = @_;
407 if ($key eq 'mirror') {
408 return _mirror_help($ctx, $txt);
409 } elsif ($key eq 'color') {
410 return _colors_help($ctx, $txt);
411 } elsif ($key eq 'config') {
412 return $ctx->{ibx}->can('cloneurl') ?
413 inbox_config($ctx, $hdr, $txt) :
414 extindex_config($ctx, $hdr, $txt);
416 return if $key ne 'help'; # TODO more keys?
418 my $ibx = $ctx->{ibx};
419 my $base_url = $ibx->base_url($ctx->{env});
421 public-inbox help for $base_url
426 public-inbox uses Message-ID identifiers in URLs.
427 One may look up messages by substituting Message-IDs
428 (without the leading '<' or trailing '>') into the URL.
429 Forward slash ('/') characters in the Message-IDs
430 need to be escaped as "%2F" (without quotes).
432 Thus, it is possible to retrieve any message by its
433 Message-ID by going to:
435 $base_url<Message-ID>/
436 (without the '<' or '>')
438 Message-IDs are described at:
444 # n.b. we use the Xapian DB for any regeneratable,
445 # order-of-arrival-independent data.
451 This public-inbox has search functionality provided by Xapian.
453 It supports typical AND, OR, NOT, '+', '-' queries present
454 in other search engines.
456 We also support search prefixes to limit the scope of the
457 search to certain fields.
459 Prefixes supported in this installation include:
462 _srch_prefix($ibx, $txt);
465 Most prefixes are probabilistic, meaning they support stemming
466 and wildcards ('*'). Ranges (such as 'd:') and boolean prefixes
467 do not support stemming or wildcards.
468 The upstream Xapian query parser documentation fully explains
480 Message threading is enabled for this public-inbox,
481 additional endpoints for message threads are available:
483 * $base_url<Message-ID>/T/#u
485 Loads the thread belonging to the given <Message-ID>
486 in flat chronological order. The "#u" anchor
487 focuses the browser on the given <Message-ID>.
489 * $base_url<Message-ID>/t/#u
491 Loads the thread belonging to the given <Message-ID>
492 in threaded order with nesting. For deep threads,
493 this requires a wide display or horizontal scrolling.
495 Both of these HTML endpoints are suitable for offline reading
496 using the thread overview at the bottom of each page.
498 The gzipped mbox for a thread is available for downloading and
499 importing into your favorite mail client:
501 * $base_url<Message-ID>/t.mbox.gz
503 We use the mboxrd variant of the mbox format described at:
507 Users of feed readers may follow a particular thread using:
509 * $base_url<Message-ID>/t.atom
511 Which loads the thread in Atom Syndication Standard
512 described at Wikipedia and RFC4287:
514 $WIKI_URL/Atom_(standard)
515 https://tools.ietf.org/html/rfc4287
517 Atom Threading Extensions (RFC4685) are supported:
519 https://tools.ietf.org/html/rfc4685
524 _add_non_http_urls($ctx, \(my $note = ''));
525 $note and $note =~ s/^/ /gms and $$txt .= <<EOF;
534 This help text is maintained by public-inbox developers
535 reachable via plain-text email at: meta\@public-inbox.org
536 Their inbox is archived at: https://public-inbox.org/meta/
538 # TODO: support admin contact info in ~/.public-inbox/config