1 # Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # used for displaying help texts and other non-mail content
5 package PublicInbox::WwwText;
8 use PublicInbox::Linkify;
9 use PublicInbox::WwwStream;
10 use PublicInbox::Hval qw(ascii_html);
11 use URI::Escape qw(uri_escape_utf8);
12 use PublicInbox::GzipFilter qw(gzf_maybe);
13 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
14 our $WIKI_URL = 'https://en.wikipedia.org/wiki';
16 require PublicInbox::HlMod;
17 PublicInbox::HlMod->new
20 # /$INBOX/_/text/$KEY/ # KEY may contain slashes
21 # For now, "help" is the only supported $KEY
26 $key = 'help' if !defined $key; # this 302s to _/text/help/
28 # get the raw text the same way we get mboxrds
29 my $raw = ($key =~ s!/raw\z!!);
30 my $have_tslash = ($key =~ s!/\z!!) if !$raw;
33 my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ];
34 if (!_default_text($ctx, $key, $hdr, \$txt)) {
36 $txt = "404 Not Found ($key)\n";
38 my $env = $ctx->{env};
41 my $gzf = gzf_maybe($hdr, $env);
42 $txt = $gzf->translate($txt);
45 $hdr->[3] = length($txt);
46 return [ $code, $hdr, [ $txt ] ]
49 # enforce trailing slash for "wget -r" compatibility
50 if (!$have_tslash && $code == 200) {
51 my $url = $ctx->{ibx}->base_url($env);
52 $url .= "_/text/$key/";
54 return [ 302, [ 'Content-Type', 'text/plain',
56 [ "Redirecting to $url\n" ] ];
59 # Follow git commit message conventions,
60 # first line is the Subject/title
61 my ($title) = ($txt =~ /\A([^\n]*)/s);
62 $ctx->{-title_html} = ascii_html($title);
63 my $nslash = ($key =~ tr!/!/!);
64 $ctx->{-upfx} = '../../../' . ('../' x $nslash);
65 my $l = PublicInbox::Linkify->new;
68 $hl->do_hl_text(\$txt);
70 $txt = ascii_html($txt);
72 $txt = '<pre>' . $l->linkify_2($txt) . '</pre>';
73 PublicInbox::WwwStream::html_oneshot($ctx, $code, \$txt);
76 sub _srch_prefix ($$) {
77 my ($srch, $txt) = @_;
80 my $help = $srch->help;
82 for ($i = 0; $i < @$help; $i += 2) {
83 my $pfx = $help->[$i];
85 $pad = $n if $n > $pad;
87 $htxt .= $help->[$i + 1];
91 my $padding = ' ' x ($pad + 8);
92 $htxt =~ s/^/$padding/gms;
93 $htxt =~ s/^$padding(\S+)\0/" $1".
94 (' ' x ($pad - length($1)))/egms;
95 $htxt =~ s/\f\n/\n/gs;
100 sub _colors_help ($$) {
101 my ($ctx, $txt) = @_;
102 my $ibx = $ctx->{ibx};
103 my $env = $ctx->{env};
104 my $base_url = $ibx->base_url($env);
105 $$txt .= "color customization for $base_url\n";
108 public-inbox provides a stable set of CSS classes for users to
109 customize colors for highlighting diffs and code.
111 Users of browsers such as dillo, Firefox, or some browser
112 extensions may start by downloading the following sample CSS file
113 to control the colors they see:
115 ${base_url}userContent.css
121 $$txt .= PublicInbox::UserContent::sample($ibx, $env) . "```\n";
124 # git-config section names are quoted in the config file, so escape them
127 $name =~ s/\\/\\\\/g;
132 sub _coderepo_config ($$) {
133 my ($ctx, $txt) = @_;
134 my $cr = $ctx->{ibx}->{coderepo} // return;
135 # note: this doesn't preserve cgitrc layout, since we parse cgitrc
136 # and drop the original structure
137 $$txt .= "\tcoderepo = $_\n" for @$cr;
140 ; `coderepo' entries allows blob reconstruction via patch emails if
141 ; the inbox is indexed with Xapian. `@@ <from-range> <to-range> @@'
142 ; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
143 ; an HTTP endpoint which reconstructs git blobs via git-apply(1).
145 my $pi_cfg = $ctx->{www}->{pi_cfg};
146 for my $cr_name (@$cr) {
147 my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl");
148 my $path = "/path/to/$cr_name";
149 $cr_name = dq_escape($cr_name);
151 $$txt .= qq([coderepo "$cr_name"]\n);
152 if ($urls && scalar(@$urls)) {
154 $$txt .= join(" ||\n\t;\t", map {;
156 if ($path !~ m![a-z0-9_/\.\-]!i) {
157 $dst = '"'.dq_escape($dst).'"';
159 qq(git clone $_ $dst);
163 $$txt .= "\tdir = $path\n";
164 $$txt .= "\tcgiturl = https://example.com/";
165 $$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n";
169 # n.b. this is a perfect candidate for memoization
170 sub inbox_config ($$$) {
171 my ($ctx, $hdr, $txt) = @_;
172 my $ibx = $ctx->{ibx};
173 push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
174 my $name = dq_escape($ibx->{name});
175 my $inboxdir = '/path/to/top-level-inbox';
176 my $base_url = $ibx->base_url($ctx->{env});
178 ; Example public-inbox config snippet for a mirror of
180 ; See public-inbox-config(5) manpage for more details:
181 ; https://public-inbox.org/public-inbox-config.html
182 [publicinbox "$name"]
184 ; note: public-inbox before v1.2.0 used `mainrepo' instead of
185 ; `inboxdir', both remain supported after 1.2
187 url = https://example.com/$name/
188 url = http://example.onion/$name/
190 for my $k (qw(address listid infourl watchheader)) {
191 defined(my $v = $ibx->{$k}) or next;
192 $$txt .= "\t$k = $_\n" for @$v;
194 if (my $altid = $ibx->{altid}) {
195 my $altid_map = $ibx->altid_map;
197 ; altid DBs may be used to provide numeric article ID lookup from
198 ; old, pre-existing sources. You can recreate them via curl(1),
199 ; gzip(1), and sqlite3(1) as documented:
201 for (sort keys %$altid_map) {
202 $$txt .= "\t;\tcurl -d '' $base_url$_.sql.gz | \\\n" .
203 "\t;\tgzip -dc | \\\n" .
204 "\t;\tsqlite3 $inboxdir/$_.sqlite3\n";
205 $$txt .= "\taltid = serial:$_:file=$_.sqlite3\n";
209 for my $k (qw(filter newsgroup obfuscate replyto)) {
210 defined(my $v = $ibx->{$k}) or next;
211 $$txt .= "\t$k = $v\n";
213 $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url});
214 _coderepo_config($ctx, $txt);
218 # n.b. this is a perfect candidate for memoization
219 sub extindex_config ($$$) {
220 my ($ctx, $hdr, $txt) = @_;
221 my $ibx = $ctx->{ibx};
222 push @$hdr, 'Content-Disposition', 'inline; filename=extindex.config';
223 my $name = dq_escape($ibx->{name});
224 my $base_url = $ibx->base_url($ctx->{env});
226 ; Example public-inbox config snippet for the external index (extindex) at:
228 ; See public-inbox-config(5)manpage for more details:
229 ; https://public-inbox.org/public-inbox-config.html
231 topdir = /path/to/extindex-topdir
232 url = https://example.com/$name/
233 url = http://example.onion/$name/
235 for my $k (qw(infourl)) {
236 defined(my $v = $ibx->{$k}) or next;
237 $$txt .= "\t$k = $v\n";
239 _coderepo_config($ctx, $txt);
243 sub _default_text ($$$$) {
244 my ($ctx, $key, $hdr, $txt) = @_;
245 return _colors_help($ctx, $txt) if $key eq 'color';
246 $key eq 'config' and return $ctx->{ibx}->can('cloneurl') ?
247 inbox_config($ctx, $hdr, $txt) :
248 extindex_config($ctx, $hdr, $txt);
249 return if $key ne 'help'; # TODO more keys?
251 my $ibx = $ctx->{ibx};
252 my $base_url = $ibx->base_url($ctx->{env});
253 $$txt .= "public-inbox help for $base_url\n";
259 public-inbox uses Message-ID identifiers in URLs.
260 One may look up messages by substituting Message-IDs
261 (without the leading '<' or trailing '>') into the URL.
262 Forward slash ('/') characters in the Message-IDs
263 need to be escaped as "%2F" (without quotes).
265 Thus, it is possible to retrieve any message by its
266 Message-ID by going to:
268 $base_url<Message-ID>/
270 (without the '<' or '>')
272 Message-IDs are described at:
278 # n.b. we use the Xapian DB for any regeneratable,
279 # order-of-arrival-independent data.
280 my $srch = $ibx->isrch;
286 This public-inbox has search functionality provided by Xapian.
288 It supports typical AND, OR, NOT, '+', '-' queries present
289 in other search engines.
291 We also support search prefixes to limit the scope of the
292 search to certain fields.
294 Prefixes supported in this installation include:
297 _srch_prefix($srch, $txt);
301 Most prefixes are probabilistic, meaning they support stemming
302 and wildcards ('*'). Ranges (such as 'd:') and boolean prefixes
303 do not support stemming or wildcards.
304 The upstream Xapian query parser documentation fully explains
311 my $over = $ibx->over;
317 Message threading is enabled for this public-inbox,
318 additional endpoints for message threads are available:
320 * $base_url<Message-ID>/T/#u
322 Loads the thread belonging to the given <Message-ID>
323 in flat chronological order. The "#u" anchor
324 focuses the browser on the given <Message-ID>.
326 * $base_url<Message-ID>/t/#u
328 Loads the thread belonging to the given <Message-ID>
329 in threaded order with nesting. For deep threads,
330 this requires a wide display or horizontal scrolling.
332 Both of these HTML endpoints are suitable for offline reading
333 using the thread overview at the bottom of each page.
335 Users of feed readers may follow a particular thread using:
337 * $base_url<Message-ID>/t.atom
339 Which loads the thread in Atom Syndication Standard
340 described at Wikipedia and RFC4287:
342 $WIKI_URL/Atom_(standard)
343 https://tools.ietf.org/html/rfc4287
345 Atom Threading Extensions (RFC4685) is supported:
347 https://tools.ietf.org/html/rfc4685
349 Finally, the gzipped mbox for a thread is available for
350 downloading and importing into your favorite mail client:
352 * $base_url<Message-ID>/t.mbox.gz
354 We use the mboxrd variant of the mbox format described
366 This help text is maintained by public-inbox developers
367 reachable via plain-text email at: meta\@public-inbox.org
368 Their inbox is archived at: https://public-inbox.org/meta/
371 # TODO: support admin contact info in ~/.public-inbox/config