# used for displaying help texts and other non-mail content
package PublicInbox::WwwText;
use strict;
use v5.10.1;
use PublicInbox::Linkify;
use PublicInbox::WwwStream;
use PublicInbox::Hval qw(ascii_html prurl);
use HTTP::Date qw(time2str);
use URI::Escape qw(uri_escape_utf8);
use PublicInbox::GzipFilter qw(gzf_maybe);
our $QP_URL = 'https://xapian.org/docs/queryparser.html';
our $WIKI_URL = 'https://en.wikipedia.org/wiki';
my $hl = eval {
require PublicInbox::HlMod;
PublicInbox::HlMod->new
};
# /$INBOX/_/text/$KEY/ # KEY may contain slashes
# For now, "help" is the only supported $KEY
sub get_text {
my ($ctx, $key) = @_;
my $code = 200;
$key //= 'help'; # this 302s to _/text/help/
# get the raw text the same way we get mboxrds
my $raw = ($key =~ s!/raw\z!!);
my $have_tslash = ($key =~ s!/\z!!) if !$raw;
my $txt = '';
if (!_default_text($ctx, $key, \$txt)) {
$code = 404;
$txt = "404 Not Found ($key)\n";
}
my $env = $ctx->{env};
if ($raw) {
my $h = delete $ctx->{-res_hdr};
$txt = gzf_maybe($h, $env)->zflush($txt) if $code == 200;
push @$h, 'Content-Type', 'text/plain',
'Content-Length', length($txt);
return [ $code, $h, [ $txt ] ]
}
# enforce trailing slash for "wget -r" compatibility
if (!$have_tslash && $code == 200) {
my $url = $ctx->{ibx}->base_url($env);
$url .= "_/text/$key/";
return [ 302, [ 'Content-Type', 'text/plain',
'Location', $url ],
[ "Redirecting to $url\n" ] ];
}
# Follow git commit message conventions,
# first line is the Subject/title
my ($title) = ($txt =~ /\A([^\n]*)/s);
$ctx->{-title_html} = ascii_html($title);
my $nslash = ($key =~ tr!/!/!);
$ctx->{-upfx} = '../../../' . ('../' x $nslash);
my $l = PublicInbox::Linkify->new;
$l->linkify_1($txt);
if ($hl) {
$hl->do_hl_text(\$txt);
} else {
$txt = ascii_html($txt);
}
$txt = '' . $l->linkify_2($txt) . '
';
$txt =~ s!^search$!search!sm;
$txt =~ s!\bPOP3\b!POP3!;
$txt =~ s!\b(Newsgroups?)\b!$1!;
$txt =~ s!\bIMAP\b!IMAP!;
PublicInbox::WwwStream::html_oneshot($ctx, $code, $txt);
}
sub _srch_prefix ($$) {
my ($ibx, $txt) = @_;
my $pad = 0;
my $htxt = '';
my $help = $ibx->isrch->help;
my $i;
for ($i = 0; $i < @$help; $i += 2) {
my $pfx = $help->[$i];
my $n = length($pfx);
$pad = $n if $n > $pad;
$htxt .= $pfx . "\0";
$htxt .= $help->[$i + 1];
$htxt .= "\f\n";
}
$pad += 2;
my $padding = ' ' x ($pad + 4);
$htxt =~ s/^/$padding/gms;
$htxt =~ s/^$padding(\S+)\0/" $1".(' ' x ($pad - length($1)))/egms;
$htxt =~ s/\f\n/\n/gs;
$$txt .= $htxt;
1;
}
sub _colors_help ($$) {
my ($ctx, $txt) = @_;
my $ibx = $ctx->{ibx};
my $env = $ctx->{env};
my $base_url = $ibx->base_url($env);
$$txt .= "color customization for $base_url\n";
$$txt .= <{ibx}->{coderepo} // return;
# note: this doesn't preserve cgitrc layout, since we parse cgitrc
# and drop the original structure
$$txt .= "\tcoderepo = $_\n" for @$cr;
$$txt .= <<'EOF';
; `coderepo' entries allows blob reconstruction via patch emails if
; the inbox is indexed with Xapian. `@@ @@'
; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/,
; an HTTP endpoint which reconstructs git blobs via git-apply(1).
EOF
my $pi_cfg = $ctx->{www}->{pi_cfg};
for my $cr_name (@$cr) {
my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl");
my $path = "/path/to/$cr_name";
$cr_name = dq_escape($cr_name);
$$txt .= qq([coderepo "$cr_name"]\n);
if ($urls && scalar(@$urls)) {
$$txt .= "\t; ";
$$txt .= join(" ||\n\t;\t", map {;
my $dst = $path;
if ($path !~ m![a-z0-9_/\.\-]!i) {
$dst = '"'.dq_escape($dst).'"';
}
qq(git clone $_ $dst);
} @$urls);
$$txt .= "\n";
}
$$txt .= "\tdir = $path\n";
$$txt .= "\tcgiturl = https://example.com/";
$$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n";
}
}
# n.b. this is a perfect candidate for memoization
sub inbox_config ($$) {
my ($ctx, $txt) = @_;
my $ibx = $ctx->{ibx};
push @{$ctx->{-res_hdr}},
'Content-Disposition', 'inline; filename=inbox.config';
my $t = eval { $ibx->mm->created_at };
push(@{$ctx->{-res_hdr}}, 'Last-Modified', time2str($t)) if $t;
my $name = dq_escape($ibx->{name});
my $inboxdir = '/path/to/top-level-inbox';
my $base_url = $ibx->base_url($ctx->{env});
$$txt .= <{$k}) or next;
$$txt .= "\t$k = $_\n" for @$v;
}
if (my $altid = $ibx->{altid}) {
my $altid_map = $ibx->altid_map;
$$txt .= <{$k}) or next;
$$txt .= "\t$k = $v\n";
}
$$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)});
$$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)});
_coderepo_config($ctx, $txt);
1;
}
# n.b. this is a perfect candidate for memoization
sub extindex_config ($$) {
my ($ctx, $txt) = @_;
my $ibx = $ctx->{ibx};
push @{$ctx->{-res_hdr}},
'Content-Disposition', 'inline; filename=extindex.config';
my $name = dq_escape($ibx->{name});
my $base_url = $ibx->base_url($ctx->{env});
$$txt .= <{$k}) or next;
$$txt .= "\t$k = $v\n";
}
_coderepo_config($ctx, $txt);
1;
}
sub coderepos_raw ($$) {
my ($ctx, $top_url) = @_;
my $cr = $ctx->{ibx}->{coderepo} // return ();
my $cfg = $ctx->{www}->{pi_cfg};
my @ret = ('Code repositories for project(s) associated with this '.
$ctx->{ibx}->thing_type . "\n");
for my $cr_name (@$cr) {
my $urls = $cfg->get_all("coderepo.$cr_name.cgiturl");
if ($urls) {
for (@$urls) {
my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ :
$top_url.$_;
$ret[0] .= "\n\t" . prurl($ctx->{env}, $u);
}
} else {
$ret[0] .= qq[\n\t$cr_name.git (no URL configured)];
}
}
@ret; # may be empty, this sub is called as an arg for join()
}
sub _add_non_http_urls ($$) {
my ($ctx, $txt) = @_;
$ctx->{ibx}->can('nntp_url') or return; # TODO extindex can have IMAP
my $urls = $ctx->{ibx}->imap_url($ctx);
if (@$urls) {
$urls = join("\n ", @$urls);
$urls =~ s!://([^/@]+)/!://;AUTH=ANONYMOUS\@$1/!sg;
$$txt .= <{ibx}->nntp_url($ctx);
if (@$urls) {
$$txt .= @$urls == 1 ? "\nNewsgroup" : "\nNewsgroups are";
$$txt .= ' available over NNTP:';
$$txt .= "\n " . join("\n ", @$urls) . "\n";
}
$urls = $ctx->{ibx}->pop3_url($ctx);
if (@$urls) {
$urls = join("\n ", @$urls);
$$txt .= <{ibx}->{newsgroup}
where \$(uuidgen) in the output of the `uuidgen' command on your system.
The UUID in the username functions as a private cookie (don't share it).
Idle accounts will expire periodically.
EOM
}
}
sub _add_onion_note ($) {
my ($txt) = @_;
$$txt =~ m!\b[^:]+://\w+\.onion/!i and $$txt .= <{ibx};
my $base_url = $ibx->base_url($ctx->{env});
chop $base_url; # no trailing slash for "git clone"
my $dir = (split(m!/!, $base_url))[-1];
my %seen = ($base_url => 1);
my $top_url = $base_url;
$top_url =~ s!/[^/]+\z!/!;
$$txt .= "public-inbox mirroring instructions\n\n";
if ($ibx->can('cloneurl')) { # PublicInbox::Inbox
$$txt .=
"This public inbox may be cloned and mirrored by anyone:\n";
my @urls;
my $max = $ibx->max_git_epoch;
# TODO: some of these URLs may be too long and we may need to
# do something like code_footer() above, but these are local
# admin-defined
if (defined($max)) { # v2
for my $i (0..$max) {
# old epochs my be deleted:
-d "$ibx->{inboxdir}/git/$i.git" or next;
my $url = "$base_url/$i";
$seen{$url} = 1;
push @urls, "$url $dir/git/$i.git";
}
my $nr = scalar(@urls);
if ($nr > 1) {
chomp($$txt .= <cloneurl}) {
next if $seen{$u}++;
push @urls, $u;
}
$$txt .= "\n";
$$txt .= join('', map { " git clone --mirror $_\n" } @urls);
my $addrs = $ibx->{address} // 'inbox@example.com';
my $ng = $ibx->{newsgroup} // '';
substr($ng, 0, 0, ' --ng ') if $ng;
$addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY';
my $v = defined $max ? '-V2' : '-V1';
$$txt .= <{name} ./$dir $base_url \\
$addrs
public-inbox-index ./$dir
EOF
} else { # PublicInbox::ExtSearch
$$txt .= <{www}->{pi_cfg}->{lc('publicInbox.wwwListing')};
if (($v // '') =~ /\A(?:all|match=domain)\z/) {
$$txt .= <{env}, $PublicInbox::WwwStream::CODE_URL);
$$txt .= join("\n\n",
coderepos_raw($ctx, $top_url), # may be empty
"AGPL code for this site:\n git clone $code_url");
1;
}
sub _default_text ($$$) {
my ($ctx, $key, $txt) = @_;
if ($key eq 'mirror') {
return _mirror_help($ctx, $txt);
} elsif ($key eq 'color') {
return _colors_help($ctx, $txt);
} elsif ($key eq 'config') {
return $ctx->{ibx}->can('cloneurl') ?
inbox_config($ctx, $txt) :
extindex_config($ctx, $txt);
}
return if $key ne 'help'; # TODO more keys?
my $ibx = $ctx->{ibx};
my $base_url = $ibx->base_url($ctx->{env});
$$txt .= <') into the URL.
Forward slash ('/') characters in the Message-IDs
need to be escaped as "%2F" (without quotes).
Thus, it is possible to retrieve any message by its
Message-ID by going to:
$base_url/
(without the '<' or '>')
Message-IDs are described at:
$WIKI_URL/Message-ID
EOF
# n.b. we use the Xapian DB for any regeneratable,
# order-of-arrival-independent data.
if ($ibx->isrch) {
$$txt .= <over) {
$$txt .= </T/#u
Loads the thread belonging to the given
in flat chronological order. The "#u" anchor
focuses the browser on the given .
* $base_url/t/#u
Loads the thread belonging to the given
in threaded order with nesting. For deep threads,
this requires a wide display or horizontal scrolling.
Both of these HTML endpoints are suitable for offline reading
using the thread overview at the bottom of each page.
The gzipped mbox for a thread is available for downloading and
importing into your favorite mail client:
* $base_url/t.mbox.gz
We use the mboxrd variant of the mbox format described at:
$WIKI_URL/Mbox
Users of feed readers may follow a particular thread using:
* $base_url/t.atom
Which loads the thread in Atom Syndication Standard
described at Wikipedia and RFC4287:
$WIKI_URL/Atom_(standard)
https://tools.ietf.org/html/rfc4287
Atom Threading Extensions (RFC4685) are supported:
https://tools.ietf.org/html/rfc4685
EOF
} # $over
_add_non_http_urls($ctx, \(my $note = ''));
$note and $note =~ s/^/ /gms and $$txt .= <