1 # Copyright (C) all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # used for displaying help texts and other non-mail content
5 package PublicInbox::WwwText;
8 use PublicInbox::Linkify;
9 use PublicInbox::WwwStream;
10 use PublicInbox::Hval qw(ascii_html prurl);
11 use HTTP::Date qw(time2str);
12 use URI::Escape qw(uri_escape_utf8);
13 use PublicInbox::GzipFilter qw(gzf_maybe);
14 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
15 our $WIKI_URL = 'https://en.wikipedia.org/wiki';
17 require PublicInbox::HlMod;
18 PublicInbox::HlMod->new
21 # /$INBOX/_/text/$KEY/ # KEY may contain slashes
22 # For now, "help" is the only supported $KEY
27 $key //= 'help'; # this 302s to _/text/help/
29 # get the raw text the same way we get mboxrds
30 my $raw = ($key =~ s!/raw\z!!);
31 my $have_tslash = ($key =~ s!/\z!!) if !$raw;
34 my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ];
35 if (!_default_text($ctx, $key, $hdr, \$txt)) {
37 $txt = "404 Not Found ($key)\n";
39 my $env = $ctx->{env};
41 $txt = gzf_maybe($hdr, $env)->zflush($txt) if $code == 200;
42 $hdr->[3] = length($txt);
43 return [ $code, $hdr, [ $txt ] ]
46 # enforce trailing slash for "wget -r" compatibility
47 if (!$have_tslash && $code == 200) {
48 my $url = $ctx->{ibx}->base_url($env);
49 $url .= "_/text/$key/";
51 return [ 302, [ 'Content-Type', 'text/plain',
53 [ "Redirecting to $url\n" ] ];
56 # Follow git commit message conventions,
57 # first line is the Subject/title
58 my ($title) = ($txt =~ /\A([^\n]*)/s);
59 $ctx->{-title_html} = ascii_html($title);
60 my $nslash = ($key =~ tr!/!/!);
61 $ctx->{-upfx} = '../../../' . ('../' x $nslash);
62 my $l = PublicInbox::Linkify->new;
65 $hl->do_hl_text(\$txt);
67 $txt = ascii_html($txt);
69 $txt = '<pre>' . $l->linkify_2($txt) . '</pre>';
70 $txt =~ s!^search$!<a\nid=search>search</a>!sm;
71 $txt =~ s!\bPOP3\b!<a\nid=pop3>POP3</a>!;
72 $txt =~ s!\b(Newsgroups?)\b!<a\nid=nntp>$1</a>!;
73 $txt =~ s!\bIMAP\b!<a\nid=imap>IMAP</a>!;
74 PublicInbox::WwwStream::html_oneshot($ctx, $code, $txt);
77 sub _srch_prefix ($$) {
81 my $help = $ibx->isrch->help;
83 for ($i = 0; $i < @$help; $i += 2) {
84 my $pfx = $help->[$i];
86 $pad = $n if $n > $pad;
88 $htxt .= $help->[$i + 1];
92 my $padding = ' ' x ($pad + 4);
93 $htxt =~ s/^/$padding/gms;
94 $htxt =~ s/^$padding(\S+)\0/" $1".(' ' x ($pad - length($1)))/egms;
95 $htxt =~ s/\f\n/\n/gs;
100 sub _colors_help ($$) {
101 my ($ctx, $txt) = @_;
102 my $ibx = $ctx->{ibx};
103 my $env = $ctx->{env};
104 my $base_url = $ibx->base_url($env);
105 $$txt .= "color customization for $base_url\n";
108 public-inbox provides a stable set of CSS classes for users to
109 customize colors for highlighting diffs and code.
111 Users of browsers such as dillo, Firefox, or some browser
112 extensions may start by downloading the following sample CSS file
113 to control the colors they see:
115 ${base_url}userContent.css
121 $$txt .= PublicInbox::UserContent::sample($ibx, $env) . "```\n";
124 # git-config section names are quoted in the config file, so escape them
127 $name =~ s/\\/\\\\/g;
132 sub _coderepo_config ($$) {
133 my ($ctx, $txt) = @_;
134 my $cr = $ctx->{ibx}->{coderepo} // return;
135 # note: this doesn't preserve cgitrc layout, since we parse cgitrc
136 # and drop the original structure
137 $$txt .= "\tcoderepo = $_\n" for @$cr;
140 ; `coderepo' entries allows blob reconstruction via patch emails if
141 ; the inbox is indexed with Xapian. `@@ <from-range> <to-range> @@'
142 ; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
143 ; an HTTP endpoint which reconstructs git blobs via git-apply(1).
145 my $pi_cfg = $ctx->{www}->{pi_cfg};
146 for my $cr_name (@$cr) {
147 my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl");
148 my $path = "/path/to/$cr_name";
149 $cr_name = dq_escape($cr_name);
151 $$txt .= qq([coderepo "$cr_name"]\n);
152 if ($urls && scalar(@$urls)) {
154 $$txt .= join(" ||\n\t;\t", map {;
156 if ($path !~ m![a-z0-9_/\.\-]!i) {
157 $dst = '"'.dq_escape($dst).'"';
159 qq(git clone $_ $dst);
163 $$txt .= "\tdir = $path\n";
164 $$txt .= "\tcgiturl = https://example.com/";
165 $$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n";
169 # n.b. this is a perfect candidate for memoization
170 sub inbox_config ($$$) {
171 my ($ctx, $hdr, $txt) = @_;
172 my $ibx = $ctx->{ibx};
173 push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
174 my $t = eval { $ibx->mm->created_at };
175 push(@$hdr, 'Last-Modified', time2str($t)) if $t;
176 my $name = dq_escape($ibx->{name});
177 my $inboxdir = '/path/to/top-level-inbox';
178 my $base_url = $ibx->base_url($ctx->{env});
180 ; Example public-inbox config snippet for a mirror of
182 ; See public-inbox-config(5) manpage for more details:
183 ; https://public-inbox.org/public-inbox-config.html
184 [publicinbox "$name"]
186 ; note: public-inbox before v1.2.0 used `mainrepo' instead of
187 ; `inboxdir', both remain supported after 1.2
189 url = https://example.com/$name/
190 url = http://example.onion/$name/
192 for my $k (qw(address listid infourl watchheader)) {
193 defined(my $v = $ibx->{$k}) or next;
194 $$txt .= "\t$k = $_\n" for @$v;
196 if (my $altid = $ibx->{altid}) {
197 my $altid_map = $ibx->altid_map;
199 ; altid DBs may be used to provide numeric article ID lookup from
200 ; old, pre-existing sources. You can recreate them via curl(1),
201 ; gzip(1), and sqlite3(1) as documented:
203 for (sort keys %$altid_map) {
204 $$txt .= "\t;\tcurl -d '' $base_url$_.sql.gz | \\\n" .
205 "\t;\tgzip -dc | \\\n" .
206 "\t;\tsqlite3 $inboxdir/$_.sqlite3\n";
207 $$txt .= "\taltid = serial:$_:file=$_.sqlite3\n";
211 for my $k (qw(filter newsgroup obfuscate replyto)) {
212 defined(my $v = $ibx->{$k}) or next;
213 $$txt .= "\t$k = $v\n";
215 $$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)});
216 $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)});
217 _coderepo_config($ctx, $txt);
221 # n.b. this is a perfect candidate for memoization
222 sub extindex_config ($$$) {
223 my ($ctx, $hdr, $txt) = @_;
224 my $ibx = $ctx->{ibx};
225 push @$hdr, 'Content-Disposition', 'inline; filename=extindex.config';
226 my $name = dq_escape($ibx->{name});
227 my $base_url = $ibx->base_url($ctx->{env});
229 ; Example public-inbox config snippet for the external index (extindex) at:
231 ; See public-inbox-config(5)manpage for more details:
232 ; https://public-inbox.org/public-inbox-config.html
234 topdir = /path/to/extindex-topdir
235 url = https://example.com/$name/
236 url = http://example.onion/$name/
238 for my $k (qw(infourl)) {
239 defined(my $v = $ibx->{$k}) or next;
240 $$txt .= "\t$k = $v\n";
242 _coderepo_config($ctx, $txt);
246 sub coderepos_raw ($$) {
247 my ($ctx, $top_url) = @_;
248 my $cr = $ctx->{ibx}->{coderepo} // return ();
249 my $cfg = $ctx->{www}->{pi_cfg};
250 my @ret = ('Code repositories for project(s) associated with this '.
251 $ctx->{ibx}->thing_type . "\n");
252 for my $cr_name (@$cr) {
253 my $urls = $cfg->get_all("coderepo.$cr_name.cgiturl");
256 my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ :
258 $ret[0] .= "\n\t" . prurl($ctx->{env}, $u);
261 $ret[0] .= qq[\n\t$cr_name.git (no URL configured)];
264 @ret; # may be empty, this sub is called as an arg for join()
267 sub _add_non_http_urls ($$) {
268 my ($ctx, $txt) = @_;
269 $ctx->{ibx}->can('nntp_url') or return; # TODO extindex can have IMAP
270 my $urls = $ctx->{ibx}->imap_url($ctx);
272 $urls = join("\n ", @$urls);
273 $urls =~ s!://([^/@]+)/!://;AUTH=ANONYMOUS\@$1/!sg;
276 IMAP subfolder(s) are available under:
278 # each subfolder (starting with `0') holds 50K messages at most
281 $urls = $ctx->{ibx}->nntp_url($ctx);
283 $$txt .= @$urls == 1 ? "\nNewsgroup" : "\nNewsgroups are";
284 $$txt .= ' available over NNTP:';
285 $$txt .= "\n " . join("\n ", @$urls) . "\n";
287 $urls = $ctx->{ibx}->pop3_url($ctx);
289 $urls = join("\n ", @$urls);
292 POP3 access is available:
295 The POP3 password is: anonymous
296 The POP3 username is: \$(uuidgen)\@$ctx->{ibx}->{newsgroup}
297 where \$(uuidgen) in the output of the `uuidgen' command on your system.
298 The UUID in the username functions as a private cookie (don't share it).
299 Idle accounts will expire periodically.
304 sub _add_onion_note ($) {
306 $$txt =~ m!\b[^:]+://\w+\.onion/!i and $$txt .= <<EOM
308 note: .onion URLs require Tor: https://www.torproject.org/
313 sub _mirror_help ($$) {
314 my ($ctx, $txt) = @_;
315 my $ibx = $ctx->{ibx};
316 my $base_url = $ibx->base_url($ctx->{env});
317 chop $base_url; # no trailing slash for "git clone"
318 my $dir = (split(m!/!, $base_url))[-1];
319 my %seen = ($base_url => 1);
320 my $top_url = $base_url;
321 $top_url =~ s!/[^/]+\z!/!;
322 $$txt .= "public-inbox mirroring instructions\n\n";
323 if ($ibx->can('cloneurl')) { # PublicInbox::Inbox
325 "This public inbox may be cloned and mirrored by anyone:\n";
327 my $max = $ibx->max_git_epoch;
328 # TODO: some of these URLs may be too long and we may need to
329 # do something like code_footer() above, but these are local
331 if (defined($max)) { # v2
332 for my $i (0..$max) {
333 # old epochs my be deleted:
334 -d "$ibx->{inboxdir}/git/$i.git" or next;
335 my $url = "$base_url/$i";
337 push @urls, "$url $dir/git/$i.git";
339 my $nr = scalar(@urls);
341 chomp($$txt .= <<EOM);
343 # this inbox consists of $nr epochs: (no need to clone all of them)
345 $urls[0] .= " # oldest";
346 $urls[-1] .= " # newest";
349 push @urls, $base_url;
351 # FIXME: epoch splits can be different in other repositories,
352 # use the "cloneurl" file as-is for now:
353 for my $u (@{$ibx->cloneurl}) {
358 $$txt .= join('', map { " git clone --mirror $_\n" } @urls);
359 my $addrs = $ibx->{address} // 'inbox@example.com';
360 my $ng = $ibx->{newsgroup} // '';
361 substr($ng, 0, 0, ' --ng ') if $ng;
362 $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
363 my $v = defined $max ? '-V2' : '-V1';
366 # If you have public-inbox 1.1+ installed, you may
367 # initialize and index your mirror using the following commands:
368 public-inbox-init $v$ng \\
369 $ibx->{name} ./$dir $base_url \\
371 public-inbox-index ./$dir
373 } else { # PublicInbox::ExtSearch
375 This is an external index which is an amalgamation of several public inboxes.
376 Each public inbox needs to be mirrored individually.
378 my $v = $ctx->{www}->{pi_cfg}->{lc('publicInbox.wwwListing')};
379 if (($v // '') =~ /\A(?:all|match=domain)\z/) {
381 A list of them is available at $top_url
385 my $cfg_link = "$base_url/_/text/config/raw";
388 Example config snippet for mirrors: $cfg_link
390 _add_non_http_urls($ctx, $txt);
391 _add_onion_note($txt);
393 my $code_url = prurl($ctx->{env}, $PublicInbox::WwwStream::CODE_URL);
394 $$txt .= join("\n\n",
395 coderepos_raw($ctx, $top_url), # may be empty
396 "AGPL code for this site:\n git clone $code_url");
400 sub _default_text ($$$$) {
401 my ($ctx, $key, $hdr, $txt) = @_;
402 if ($key eq 'mirror') {
403 return _mirror_help($ctx, $txt);
404 } elsif ($key eq 'color') {
405 return _colors_help($ctx, $txt);
406 } elsif ($key eq 'config') {
407 return $ctx->{ibx}->can('cloneurl') ?
408 inbox_config($ctx, $hdr, $txt) :
409 extindex_config($ctx, $hdr, $txt);
411 return if $key ne 'help'; # TODO more keys?
413 my $ibx = $ctx->{ibx};
414 my $base_url = $ibx->base_url($ctx->{env});
416 public-inbox help for $base_url
421 public-inbox uses Message-ID identifiers in URLs.
422 One may look up messages by substituting Message-IDs
423 (without the leading '<' or trailing '>') into the URL.
424 Forward slash ('/') characters in the Message-IDs
425 need to be escaped as "%2F" (without quotes).
427 Thus, it is possible to retrieve any message by its
428 Message-ID by going to:
430 $base_url<Message-ID>/
431 (without the '<' or '>')
433 Message-IDs are described at:
439 # n.b. we use the Xapian DB for any regeneratable,
440 # order-of-arrival-independent data.
446 This public-inbox has search functionality provided by Xapian.
448 It supports typical AND, OR, NOT, '+', '-' queries present
449 in other search engines.
451 We also support search prefixes to limit the scope of the
452 search to certain fields.
454 Prefixes supported in this installation include:
457 _srch_prefix($ibx, $txt);
460 Most prefixes are probabilistic, meaning they support stemming
461 and wildcards ('*'). Ranges (such as 'd:') and boolean prefixes
462 do not support stemming or wildcards.
463 The upstream Xapian query parser documentation fully explains
475 Message threading is enabled for this public-inbox,
476 additional endpoints for message threads are available:
478 * $base_url<Message-ID>/T/#u
480 Loads the thread belonging to the given <Message-ID>
481 in flat chronological order. The "#u" anchor
482 focuses the browser on the given <Message-ID>.
484 * $base_url<Message-ID>/t/#u
486 Loads the thread belonging to the given <Message-ID>
487 in threaded order with nesting. For deep threads,
488 this requires a wide display or horizontal scrolling.
490 Both of these HTML endpoints are suitable for offline reading
491 using the thread overview at the bottom of each page.
493 The gzipped mbox for a thread is available for downloading and
494 importing into your favorite mail client:
496 * $base_url<Message-ID>/t.mbox.gz
498 We use the mboxrd variant of the mbox format described at:
502 Users of feed readers may follow a particular thread using:
504 * $base_url<Message-ID>/t.atom
506 Which loads the thread in Atom Syndication Standard
507 described at Wikipedia and RFC4287:
509 $WIKI_URL/Atom_(standard)
510 https://tools.ietf.org/html/rfc4287
512 Atom Threading Extensions (RFC4685) are supported:
514 https://tools.ietf.org/html/rfc4685
519 _add_non_http_urls($ctx, \(my $note = ''));
520 $note and $note =~ s/^/ /gms and $$txt .= <<EOF;
529 This help text is maintained by public-inbox developers
530 reachable via plain-text email at: meta\@public-inbox.org
531 Their inbox is archived at: https://public-inbox.org/meta/
533 # TODO: support admin contact info in ~/.public-inbox/config