1 # Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # used for displaying help texts and other non-mail content
5 package PublicInbox::WwwText;
8 use PublicInbox::Linkify;
9 use PublicInbox::WwwStream;
10 use PublicInbox::Hval qw(ascii_html);
11 use URI::Escape qw(uri_escape_utf8);
12 use PublicInbox::GzipFilter qw(gzf_maybe);
13 our $QP_URL = 'https://xapian.org/docs/queryparser.html';
14 our $WIKI_URL = 'https://en.wikipedia.org/wiki';
16 require PublicInbox::HlMod;
17 PublicInbox::HlMod->new
20 # /$INBOX/_/text/$KEY/ # KEY may contain slashes
21 # For now, "help" is the only supported $KEY
26 $key = 'help' if !defined $key; # this 302s to _/text/help/
28 # get the raw text the same way we get mboxrds
29 my $raw = ($key =~ s!/raw\z!!);
30 my $have_tslash = ($key =~ s!/\z!!) if !$raw;
33 my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ];
34 if (!_default_text($ctx, $key, $hdr, \$txt)) {
36 $txt = "404 Not Found ($key)\n";
38 my $env = $ctx->{env};
41 my $gzf = gzf_maybe($hdr, $env);
42 $txt = $gzf->translate($txt);
45 $hdr->[3] = length($txt);
46 return [ $code, $hdr, [ $txt ] ]
49 # enforce trailing slash for "wget -r" compatibility
50 if (!$have_tslash && $code == 200) {
51 my $url = $ctx->{ibx}->base_url($env);
52 $url .= "_/text/$key/";
54 return [ 302, [ 'Content-Type', 'text/plain',
56 [ "Redirecting to $url\n" ] ];
59 # Follow git commit message conventions,
60 # first line is the Subject/title
61 my ($title) = ($txt =~ /\A([^\n]*)/s);
62 $ctx->{-title_html} = ascii_html($title);
63 my $nslash = ($key =~ tr!/!/!);
64 $ctx->{-upfx} = '../../../' . ('../' x $nslash);
65 my $l = PublicInbox::Linkify->new;
68 $hl->do_hl_text(\$txt);
70 $txt = ascii_html($txt);
72 $txt = '<pre>' . $l->linkify_2($txt) . '</pre>';
73 PublicInbox::WwwStream::html_oneshot($ctx, $code, \$txt);
76 sub _srch_prefix ($$) {
77 my ($srch, $txt) = @_;
80 my $help = $srch->help;
82 for ($i = 0; $i < @$help; $i += 2) {
83 my $pfx = $help->[$i];
85 $pad = $n if $n > $pad;
87 $htxt .= $help->[$i + 1];
91 my $padding = ' ' x ($pad + 8);
92 $htxt =~ s/^/$padding/gms;
93 $htxt =~ s/^$padding(\S+)\0/" $1".
94 (' ' x ($pad - length($1)))/egms;
95 $htxt =~ s/\f\n/\n/gs;
100 sub _colors_help ($$) {
101 my ($ctx, $txt) = @_;
102 my $ibx = $ctx->{ibx};
103 my $env = $ctx->{env};
104 my $base_url = $ibx->base_url($env);
105 $$txt .= "color customization for $base_url\n";
108 public-inbox provides a stable set of CSS classes for users to
109 customize colors for highlighting diffs and code.
111 Users of browsers such as dillo, Firefox, or some browser
112 extensions may start by downloading the following sample CSS file
113 to control the colors they see:
115 ${base_url}userContent.css
121 $$txt .= PublicInbox::UserContent::sample($ibx, $env) . "```\n";
124 # git-config section names are quoted in the config file, so escape them
127 $name =~ s/\\/\\\\/g;
132 sub URI_PATH () { '^A-Za-z0-9\-\._~/' }
134 # n.b. this is a perfect candidate for memoization
135 sub inbox_config ($$$) {
136 my ($ctx, $hdr, $txt) = @_;
137 my $ibx = $ctx->{ibx};
138 push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config';
139 my $name = dq_escape($ibx->{name});
140 my $inboxdir = '/path/to/top-level-inbox';
141 my $base_url = $ibx->base_url($ctx->{env});
143 ; Example public-inbox config snippet for a mirror of
145 ; See public-inbox-config(5) manpage for more details:
146 ; https://public-inbox.org/public-inbox-config.html
147 [publicinbox "$name"]
149 ; note: public-inbox before v1.2.0 used `mainrepo' instead of
150 ; `inboxdir', both remain supported after 1.2
152 url = https://example.com/$name/
153 url = http://example.onion/$name/
155 for my $k (qw(address listid infourl watchheader)) {
156 defined(my $v = $ibx->{$k}) or next;
157 $$txt .= "\t$k = $_\n" for @$v;
159 if (my $altid = $ibx->{altid}) {
160 my $altid_map = $ibx->altid_map;
162 ; altid DBs may be used to provide numeric article ID lookup from
163 ; old, pre-existing sources. You can recreate them via curl(1),
164 ; gzip(1), and sqlite3(1) as documented:
166 for (sort keys %$altid_map) {
167 $$txt .= "\t;\tcurl -d '' $base_url$_.sql.gz | \\\n" .
168 "\t;\tgzip -dc | \\\n" .
169 "\t;\tsqlite3 $inboxdir/$_.sqlite3\n";
170 $$txt .= "\taltid = serial:$_:file=$_.sqlite3\n";
174 for my $k (qw(filter newsgroup obfuscate replyto)) {
175 defined(my $v = $ibx->{$k}) or next;
176 $$txt .= "\t$k = $v\n";
178 $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url});
180 # note: this doesn't preserve cgitrc layout, since we parse cgitrc
181 # and drop the original structure
182 if (defined(my $cr = $ibx->{coderepo})) {
183 $$txt .= "\tcoderepo = $_\n" for @$cr;
186 ; `coderepo' entries allows blob reconstruction via patch emails if
187 ; the inbox is indexed with Xapian. `@@ <from-range> <to-range> @@'
188 ; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
189 ; an HTTP endpoint which reconstructs git blobs via git-apply(1).
191 my $pi_cfg = $ctx->{www}->{pi_cfg};
192 for my $cr_name (@$cr) {
193 my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl");
194 my $path = "/path/to/$cr_name";
195 $cr_name = dq_escape($cr_name);
197 $$txt .= qq([coderepo "$cr_name"]\n);
198 if ($urls && scalar(@$urls)) {
200 $$txt .= join(" ||\n\t;\t", map {;
202 if ($path !~ m![a-z0-9_/\.\-]!i) {
203 $dst = '"'.dq_escape($dst).'"';
205 qq(git clone $_ $dst);
209 $$txt .= "\tdir = $path\n";
210 $$txt .= "\tcgiturl = https://example.com/";
211 $$txt .= uri_escape_utf8($cr_name, URI_PATH)."\n";
217 # n.b. this is a perfect candidate for memoization
218 sub extindex_config ($$$) {
219 my ($ctx, $hdr, $txt) = @_;
220 my $ibx = $ctx->{ibx};
221 push @$hdr, 'Content-Disposition', 'inline; filename=extindex.config';
222 my $name = dq_escape($ibx->{name});
223 my $base_url = $ibx->base_url($ctx->{env});
225 ; Example public-inbox config snippet for the external index (extindex) at:
227 ; See public-inbox-config(5)manpage for more details:
228 ; https://public-inbox.org/public-inbox-config.html
230 topdir = /path/to/extindex-topdir
231 url = https://example.com/$name/
232 url = http://example.onion/$name/
234 for my $k (qw(infourl)) {
235 defined(my $v = $ibx->{$k}) or next;
236 $$txt .= "\t$k = $v\n";
238 # TODO: coderepo support for extindex
242 sub _default_text ($$$$) {
243 my ($ctx, $key, $hdr, $txt) = @_;
244 return _colors_help($ctx, $txt) if $key eq 'color';
245 $key eq 'config' and return $ctx->{ibx}->can('cloneurl') ?
246 inbox_config($ctx, $hdr, $txt) :
247 extindex_config($ctx, $hdr, $txt);
248 return if $key ne 'help'; # TODO more keys?
250 my $ibx = $ctx->{ibx};
251 my $base_url = $ibx->base_url($ctx->{env});
252 $$txt .= "public-inbox help for $base_url\n";
258 public-inbox uses Message-ID identifiers in URLs.
259 One may look up messages by substituting Message-IDs
260 (without the leading '<' or trailing '>') into the URL.
261 Forward slash ('/') characters in the Message-IDs
262 need to be escaped as "%2F" (without quotes).
264 Thus, it is possible to retrieve any message by its
265 Message-ID by going to:
267 $base_url<Message-ID>/
269 (without the '<' or '>')
271 Message-IDs are described at:
277 # n.b. we use the Xapian DB for any regeneratable,
278 # order-of-arrival-independent data.
279 my $srch = $ibx->isrch;
285 This public-inbox has search functionality provided by Xapian.
287 It supports typical AND, OR, NOT, '+', '-' queries present
288 in other search engines.
290 We also support search prefixes to limit the scope of the
291 search to certain fields.
293 Prefixes supported in this installation include:
296 _srch_prefix($srch, $txt);
300 Most prefixes are probabilistic, meaning they support stemming
301 and wildcards ('*'). Ranges (such as 'd:') and boolean prefixes
302 do not support stemming or wildcards.
303 The upstream Xapian query parser documentation fully explains
310 my $over = $ibx->over;
316 Message threading is enabled for this public-inbox,
317 additional endpoints for message threads are available:
319 * $base_url<Message-ID>/T/#u
321 Loads the thread belonging to the given <Message-ID>
322 in flat chronological order. The "#u" anchor
323 focuses the browser on the given <Message-ID>.
325 * $base_url<Message-ID>/t/#u
327 Loads the thread belonging to the given <Message-ID>
328 in threaded order with nesting. For deep threads,
329 this requires a wide display or horizontal scrolling.
331 Both of these HTML endpoints are suitable for offline reading
332 using the thread overview at the bottom of each page.
334 Users of feed readers may follow a particular thread using:
336 * $base_url<Message-ID>/t.atom
338 Which loads the thread in Atom Syndication Standard
339 described at Wikipedia and RFC4287:
341 $WIKI_URL/Atom_(standard)
342 https://tools.ietf.org/html/rfc4287
344 Atom Threading Extensions (RFC4685) is supported:
346 https://tools.ietf.org/html/rfc4685
348 Finally, the gzipped mbox for a thread is available for
349 downloading and importing into your favorite mail client:
351 * $base_url<Message-ID>/t.mbox.gz
353 We use the mboxrd variant of the mbox format described
365 This help text is maintained by public-inbox developers
366 reachable via plain-text email at: meta\@public-inbox.org
367 Their inbox is archived at: https://public-inbox.org/meta/
370 # TODO: support admin contact info in ~/.public-inbox/config