1 # Copyright (C) all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # used for displaying help texts and other non-mail content
5 package PublicInbox::WwwText;
8 use PublicInbox::Linkify;
9 use PublicInbox::WwwStream;
10 use PublicInbox::Hval qw(ascii_html prurl);
11 use HTTP::Date qw(time2str);
12 use URI::Escape qw(uri_escape_utf8);
13 use PublicInbox::GzipFilter qw(gzf_maybe);
14 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
15 our $WIKI_URL = 'https://en.wikipedia.org/wiki';
17 require PublicInbox::HlMod;
18 PublicInbox::HlMod->new
21 # /$INBOX/_/text/$KEY/ # KEY may contain slashes
22 # For now, "help" is the only supported $KEY
27 $key //= 'help'; # this 302s to _/text/help/
29 # get the raw text the same way we get mboxrds
30 my $raw = ($key =~ s!/raw\z!!);
31 my $have_tslash = ($key =~ s!/\z!!) if !$raw;
34 if (!_default_text($ctx, $key, \$txt)) {
36 $txt = "404 Not Found ($key)\n";
38 my $env = $ctx->{env};
40 my $h = delete $ctx->{-res_hdr};
41 $txt = gzf_maybe($h, $env)->zflush($txt) if $code == 200;
42 push @$h, 'Content-Type', 'text/plain',
43 'Content-Length', length($txt);
44 return [ $code, $h, [ $txt ] ]
47 # enforce trailing slash for "wget -r" compatibility
48 if (!$have_tslash && $code == 200) {
49 my $url = $ctx->{ibx}->base_url($env);
50 $url .= "_/text/$key/";
52 return [ 302, [ 'Content-Type', 'text/plain',
54 [ "Redirecting to $url\n" ] ];
57 # Follow git commit message conventions,
58 # first line is the Subject/title
59 my ($title) = ($txt =~ /\A([^\n]*)/s);
60 $ctx->{-title_html} = ascii_html($title);
61 my $nslash = ($key =~ tr!/!/!);
62 $ctx->{-upfx} = '../../../' . ('../' x $nslash);
63 my $l = PublicInbox::Linkify->new;
66 $hl->do_hl_text(\$txt);
68 $txt = ascii_html($txt);
70 $txt = '<pre>' . $l->linkify_2($txt) . '</pre>';
71 $txt =~ s!^search$!<a\nid=search>search</a>!sm;
72 $txt =~ s!\bPOP3\b!<a\nid=pop3>POP3</a>!;
73 $txt =~ s!\b(Newsgroups?)\b!<a\nid=nntp>$1</a>!;
74 $txt =~ s!\bIMAP\b!<a\nid=imap>IMAP</a>!;
75 PublicInbox::WwwStream::html_oneshot($ctx, $code, $txt);
78 sub _srch_prefix ($$) {
82 my $help = $ibx->isrch->help;
84 for ($i = 0; $i < @$help; $i += 2) {
85 my $pfx = $help->[$i];
87 $pad = $n if $n > $pad;
89 $htxt .= $help->[$i + 1];
93 my $padding = ' ' x ($pad + 4);
94 $htxt =~ s/^/$padding/gms;
95 $htxt =~ s/^$padding(\S+)\0/" $1".(' ' x ($pad - length($1)))/egms;
96 $htxt =~ s/\f\n/\n/gs;
101 sub _colors_help ($$) {
102 my ($ctx, $txt) = @_;
103 my $ibx = $ctx->{ibx};
104 my $env = $ctx->{env};
105 my $base_url = $ibx->base_url($env);
106 $$txt .= "color customization for $base_url\n";
109 public-inbox provides a stable set of CSS classes for users to
110 customize colors for highlighting diffs and code.
112 Users of browsers such as dillo, Firefox, or some browser
113 extensions may start by downloading the following sample CSS file
114 to control the colors they see:
116 ${base_url}userContent.css
122 $$txt .= PublicInbox::UserContent::sample($ibx, $env) . "```\n";
125 # git-config section names are quoted in the config file, so escape them
128 $name =~ s/\\/\\\\/g;
133 sub _coderepo_config ($$) {
134 my ($ctx, $txt) = @_;
135 my $cr = $ctx->{ibx}->{coderepo} // return;
136 # note: this doesn't preserve cgitrc layout, since we parse cgitrc
137 # and drop the original structure
138 $$txt .= "\tcoderepo = $_\n" for @$cr;
141 ; `coderepo' entries allows blob reconstruction via patch emails if
142 ; the inbox is indexed with Xapian. `@@ <from-range> <to-range> @@'
143 ; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
144 ; an HTTP endpoint which reconstructs git blobs via git-apply(1).
146 my $pi_cfg = $ctx->{www}->{pi_cfg};
147 for my $cr_name (@$cr) {
148 my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl");
149 my $path = "/path/to/$cr_name";
150 $cr_name = dq_escape($cr_name);
152 $$txt .= qq([coderepo "$cr_name"]\n);
153 if ($urls && scalar(@$urls)) {
155 $$txt .= join(" ||\n\t;\t", map {;
157 if ($path !~ m![a-z0-9_/\.\-]!i) {
158 $dst = '"'.dq_escape($dst).'"';
160 qq(git clone $_ $dst);
164 $$txt .= "\tdir = $path\n";
165 $$txt .= "\tcgiturl = https://example.com/";
166 $$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n";
170 # n.b. this is a perfect candidate for memoization
171 sub inbox_config ($$) {
172 my ($ctx, $txt) = @_;
173 my $ibx = $ctx->{ibx};
174 push @{$ctx->{-res_hdr}},
175 'Content-Disposition', 'inline; filename=inbox.config';
176 my $t = eval { $ibx->mm->created_at };
177 push(@{$ctx->{-res_hdr}}, 'Last-Modified', time2str($t)) if $t;
178 my $name = dq_escape($ibx->{name});
179 my $inboxdir = '/path/to/top-level-inbox';
180 my $base_url = $ibx->base_url($ctx->{env});
182 ; Example public-inbox config snippet for a mirror of
184 ; See public-inbox-config(5) manpage for more details:
185 ; https://public-inbox.org/public-inbox-config.html
186 [publicinbox "$name"]
188 ; note: public-inbox before v1.2.0 used `mainrepo' instead of
189 ; `inboxdir', both remain supported after 1.2
191 url = https://example.com/$name/
192 url = http://example.onion/$name/
194 for my $k (qw(address listid infourl watchheader)) {
195 defined(my $v = $ibx->{$k}) or next;
196 $$txt .= "\t$k = $_\n" for @$v;
198 if (my $altid = $ibx->{altid}) {
199 my $altid_map = $ibx->altid_map;
201 ; altid DBs may be used to provide numeric article ID lookup from
202 ; old, pre-existing sources. You can recreate them via curl(1),
203 ; gzip(1), and sqlite3(1) as documented:
205 for (sort keys %$altid_map) {
206 $$txt .= "\t;\tcurl -d '' $base_url$_.sql.gz | \\\n" .
207 "\t;\tgzip -dc | \\\n" .
208 "\t;\tsqlite3 $inboxdir/$_.sqlite3\n";
209 $$txt .= "\taltid = serial:$_:file=$_.sqlite3\n";
213 for my $k (qw(filter newsgroup obfuscate replyto)) {
214 defined(my $v = $ibx->{$k}) or next;
215 $$txt .= "\t$k = $v\n";
217 $$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)});
218 $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)});
219 _coderepo_config($ctx, $txt);
223 # n.b. this is a perfect candidate for memoization
224 sub extindex_config ($$) {
225 my ($ctx, $txt) = @_;
226 my $ibx = $ctx->{ibx};
227 push @{$ctx->{-res_hdr}},
228 'Content-Disposition', 'inline; filename=extindex.config';
229 my $name = dq_escape($ibx->{name});
230 my $base_url = $ibx->base_url($ctx->{env});
232 ; Example public-inbox config snippet for the external index (extindex) at:
234 ; See public-inbox-config(5)manpage for more details:
235 ; https://public-inbox.org/public-inbox-config.html
237 topdir = /path/to/extindex-topdir
238 url = https://example.com/$name/
239 url = http://example.onion/$name/
241 for my $k (qw(infourl)) {
242 defined(my $v = $ibx->{$k}) or next;
243 $$txt .= "\t$k = $v\n";
245 _coderepo_config($ctx, $txt);
249 sub coderepos_raw ($$) {
250 my ($ctx, $top_url) = @_;
251 my $cr = $ctx->{ibx}->{coderepo} // return ();
252 my $cfg = $ctx->{www}->{pi_cfg};
253 my @ret = ('Code repositories for project(s) associated with this '.
254 $ctx->{ibx}->thing_type . "\n");
255 for my $cr_name (@$cr) {
256 my $urls = $cfg->get_all("coderepo.$cr_name.cgiturl");
259 my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ :
261 $ret[0] .= "\n\t" . prurl($ctx->{env}, $u);
264 $ret[0] .= qq[\n\t$cr_name.git (no URL configured)];
267 @ret; # may be empty, this sub is called as an arg for join()
270 sub _add_non_http_urls ($$) {
271 my ($ctx, $txt) = @_;
272 $ctx->{ibx}->can('nntp_url') or return; # TODO extindex can have IMAP
273 my $urls = $ctx->{ibx}->imap_url($ctx);
275 $urls = join("\n ", @$urls);
276 $urls =~ s!://([^/@]+)/!://;AUTH=ANONYMOUS\@$1/!sg;
279 IMAP subfolder(s) are available under:
281 # each subfolder (starting with `0') holds 50K messages at most
284 $urls = $ctx->{ibx}->nntp_url($ctx);
286 $$txt .= @$urls == 1 ? "\nNewsgroup" : "\nNewsgroups are";
287 $$txt .= ' available over NNTP:';
288 $$txt .= "\n " . join("\n ", @$urls) . "\n";
290 $urls = $ctx->{ibx}->pop3_url($ctx);
292 $urls = join("\n ", @$urls);
295 POP3 access is available:
298 The POP3 password is: anonymous
299 The POP3 username is: \$(uuidgen)\@$ctx->{ibx}->{newsgroup}
300 where \$(uuidgen) in the output of the `uuidgen' command on your system.
301 The UUID in the username functions as a private cookie (don't share it).
302 Idle accounts will expire periodically.
307 sub _add_onion_note ($) {
309 $$txt =~ m!\b[^:]+://\w+\.onion/!i and $$txt .= <<EOM
311 note: .onion URLs require Tor: https://www.torproject.org/
316 sub _mirror_help ($$) {
317 my ($ctx, $txt) = @_;
318 my $ibx = $ctx->{ibx};
319 my $base_url = $ibx->base_url($ctx->{env});
320 chop $base_url; # no trailing slash for "git clone"
321 my $dir = (split(m!/!, $base_url))[-1];
322 my %seen = ($base_url => 1);
323 my $top_url = $base_url;
324 $top_url =~ s!/[^/]+\z!/!;
325 $$txt .= "public-inbox mirroring instructions\n\n";
326 if ($ibx->can('cloneurl')) { # PublicInbox::Inbox
328 "This public inbox may be cloned and mirrored by anyone:\n";
330 my $max = $ibx->max_git_epoch;
331 # TODO: some of these URLs may be too long and we may need to
332 # do something like code_footer() above, but these are local
334 if (defined($max)) { # v2
335 for my $i (0..$max) {
336 # old epochs my be deleted:
337 -d "$ibx->{inboxdir}/git/$i.git" or next;
338 my $url = "$base_url/$i";
340 push @urls, "$url $dir/git/$i.git";
342 my $nr = scalar(@urls);
344 chomp($$txt .= <<EOM);
346 # this inbox consists of $nr epochs: (no need to clone all of them)
348 $urls[0] .= " # oldest";
349 $urls[-1] .= " # newest";
352 push @urls, $base_url;
354 # FIXME: epoch splits can be different in other repositories,
355 # use the "cloneurl" file as-is for now:
356 for my $u (@{$ibx->cloneurl}) {
361 $$txt .= join('', map { " git clone --mirror $_\n" } @urls);
362 my $addrs = $ibx->{address} // 'inbox@example.com';
363 my $ng = $ibx->{newsgroup} // '';
364 substr($ng, 0, 0, ' --ng ') if $ng;
365 $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
366 my $v = defined $max ? '-V2' : '-V1';
369 # If you have public-inbox 1.1+ installed, you may
370 # initialize and index your mirror using the following commands:
371 public-inbox-init $v$ng \\
372 $ibx->{name} ./$dir $base_url \\
374 public-inbox-index ./$dir
376 } else { # PublicInbox::ExtSearch
378 This is an external index which is an amalgamation of several public inboxes.
379 Each public inbox needs to be mirrored individually.
381 my $v = $ctx->{www}->{pi_cfg}->{lc('publicInbox.wwwListing')};
382 if (($v // '') =~ /\A(?:all|match=domain)\z/) {
384 A list of them is available at $top_url
388 my $cfg_link = "$base_url/_/text/config/raw";
391 Example config snippet for mirrors: $cfg_link
393 _add_non_http_urls($ctx, $txt);
394 _add_onion_note($txt);
396 my $code_url = prurl($ctx->{env}, $PublicInbox::WwwStream::CODE_URL);
397 $$txt .= join("\n\n",
398 coderepos_raw($ctx, $top_url), # may be empty
399 "AGPL code for this site:\n git clone $code_url");
403 sub _default_text ($$$) {
404 my ($ctx, $key, $txt) = @_;
405 if ($key eq 'mirror') {
406 return _mirror_help($ctx, $txt);
407 } elsif ($key eq 'color') {
408 return _colors_help($ctx, $txt);
409 } elsif ($key eq 'config') {
410 return $ctx->{ibx}->can('cloneurl') ?
411 inbox_config($ctx, $txt) :
412 extindex_config($ctx, $txt);
414 return if $key ne 'help'; # TODO more keys?
416 my $ibx = $ctx->{ibx};
417 my $base_url = $ibx->base_url($ctx->{env});
419 public-inbox help for $base_url
424 public-inbox uses Message-ID identifiers in URLs.
425 One may look up messages by substituting Message-IDs
426 (without the leading '<' or trailing '>') into the URL.
427 Forward slash ('/') characters in the Message-IDs
428 need to be escaped as "%2F" (without quotes).
430 Thus, it is possible to retrieve any message by its
431 Message-ID by going to:
433 $base_url<Message-ID>/
434 (without the '<' or '>')
436 Message-IDs are described at:
442 # n.b. we use the Xapian DB for any regeneratable,
443 # order-of-arrival-independent data.
449 This public-inbox has search functionality provided by Xapian.
451 It supports typical AND, OR, NOT, '+', '-' queries present
452 in other search engines.
454 We also support search prefixes to limit the scope of the
455 search to certain fields.
457 Prefixes supported in this installation include:
460 _srch_prefix($ibx, $txt);
463 Most prefixes are probabilistic, meaning they support stemming
464 and wildcards ('*'). Ranges (such as 'd:') and boolean prefixes
465 do not support stemming or wildcards.
466 The upstream Xapian query parser documentation fully explains
478 Message threading is enabled for this public-inbox,
479 additional endpoints for message threads are available:
481 * $base_url<Message-ID>/T/#u
483 Loads the thread belonging to the given <Message-ID>
484 in flat chronological order. The "#u" anchor
485 focuses the browser on the given <Message-ID>.
487 * $base_url<Message-ID>/t/#u
489 Loads the thread belonging to the given <Message-ID>
490 in threaded order with nesting. For deep threads,
491 this requires a wide display or horizontal scrolling.
493 Both of these HTML endpoints are suitable for offline reading
494 using the thread overview at the bottom of each page.
496 The gzipped mbox for a thread is available for downloading and
497 importing into your favorite mail client:
499 * $base_url<Message-ID>/t.mbox.gz
501 We use the mboxrd variant of the mbox format described at:
505 Users of feed readers may follow a particular thread using:
507 * $base_url<Message-ID>/t.atom
509 Which loads the thread in Atom Syndication Standard
510 described at Wikipedia and RFC4287:
512 $WIKI_URL/Atom_(standard)
513 https://tools.ietf.org/html/rfc4287
515 Atom Threading Extensions (RFC4685) are supported:
517 https://tools.ietf.org/html/rfc4685
522 _add_non_http_urls($ctx, \(my $note = ''));
523 $note and $note =~ s/^/ /gms and $$txt .= <<EOF;
532 This help text is maintained by public-inbox developers
533 reachable via plain-text email at: meta\@public-inbox.org
534 Their inbox is archived at: https://public-inbox.org/meta/
536 # TODO: support admin contact info in ~/.public-inbox/config