X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWwwText.pm;h=8b929f7431560979ddb076b321016241e854459a;hb=97510d7a92b4e44318d1917a54c70d536bbf46f2;hp=b5874cf6bad534a20daf060452884b463628440a;hpb=a9cab9c477b74b3b828c26aa64e70d88c9d6744c;p=public-inbox.git diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm index b5874cf6..8b929f74 100644 --- a/lib/PublicInbox/WwwText.pm +++ b/lib/PublicInbox/WwwText.pm @@ -1,15 +1,22 @@ -# Copyright (C) 2016-2018 all contributors +# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # used for displaying help texts and other non-mail content package PublicInbox::WwwText; use strict; -use warnings; +use v5.10.1; use PublicInbox::Linkify; use PublicInbox::WwwStream; -use PublicInbox::Hval qw(ascii_html); +use PublicInbox::Hval qw(ascii_html prurl); +use HTTP::Date qw(time2str); +use URI::Escape qw(uri_escape_utf8); +use PublicInbox::GzipFilter qw(gzf_maybe); our $QP_URL = 'https://xapian.org/docs/queryparser.html'; our $WIKI_URL = 'https://en.wikipedia.org/wiki'; +my $hl = eval { + require PublicInbox::HlMod; + PublicInbox::HlMod->new +}; # /$INBOX/_/text/$KEY/ # KEY may contain slashes # For now, "help" is the only supported $KEY @@ -17,26 +24,32 @@ sub get_text { my ($ctx, $key) = @_; my $code = 200; - $key = 'help' if !defined $key; # this 302s to _/text/help/ + $key //= 'help'; # this 302s to _/text/help/ # get the raw text the same way we get mboxrds my $raw = ($key =~ s!/raw\z!!); my $have_tslash = ($key =~ s!/\z!!) if !$raw; my $txt = ''; - if (!_default_text($ctx, $key, \$txt)) { + my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ]; + if (!_default_text($ctx, $key, $hdr, \$txt)) { $code = 404; $txt = "404 Not Found ($key)\n"; } + my $env = $ctx->{env}; if ($raw) { - return [ $code, [ 'Content-Type', 'text/plain', - 'Content-Length', bytes::length($txt) ], - [ $txt ] ] + if ($code == 200) { + my $gzf = gzf_maybe($hdr, $env); + $txt = $gzf->translate($txt); + $txt .= $gzf->zflush; + } + $hdr->[3] = length($txt); + return [ $code, $hdr, [ $txt ] ] } # enforce trailing slash for "wget -r" compatibility if (!$have_tslash && $code == 200) { - my $url = $ctx->{-inbox}->base_url($ctx->{env}); + my $url = $ctx->{ibx}->base_url($env); $url .= "_/text/$key/"; return [ 302, [ 'Content-Type', 'text/plain', @@ -47,21 +60,18 @@ sub get_text { # Follow git commit message conventions, # first line is the Subject/title my ($title) = ($txt =~ /\A([^\n]*)/s); - _do_linkify($txt); $ctx->{-title_html} = ascii_html($title); - my $nslash = ($key =~ tr!/!/!); $ctx->{-upfx} = '../../../' . ('../' x $nslash); - - PublicInbox::WwwStream->response($ctx, $code, sub { - my ($nr, undef) = @_; - $nr == 1 ? '
'.$txt.'
' : undef - }); -} - -sub _do_linkify { my $l = PublicInbox::Linkify->new; - $_[0] = $l->linkify_2(ascii_html($l->linkify_1($_[0]))); + $l->linkify_1($txt); + if ($hl) { + $hl->do_hl_text(\$txt); + } else { + $txt = ascii_html($txt); + } + $txt = '
' . $l->linkify_2($txt) . '
'; + PublicInbox::WwwStream::html_oneshot($ctx, $code, \$txt); } sub _srch_prefix ($$) { @@ -88,12 +98,303 @@ sub _srch_prefix ($$) { 1; } +sub _colors_help ($$) { + my ($ctx, $txt) = @_; + my $ibx = $ctx->{ibx}; + my $env = $ctx->{env}; + my $base_url = $ibx->base_url($env); + $$txt .= "color customization for $base_url\n"; + $$txt .= <{ibx}->{coderepo} // return; + # note: this doesn't preserve cgitrc layout, since we parse cgitrc + # and drop the original structure + $$txt .= "\tcoderepo = $_\n" for @$cr; + $$txt .= <<'EOF'; + +; `coderepo' entries allows blob reconstruction via patch emails if +; the inbox is indexed with Xapian. `@@ @@' +; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/, +; an HTTP endpoint which reconstructs git blobs via git-apply(1). +EOF + my $pi_cfg = $ctx->{www}->{pi_cfg}; + for my $cr_name (@$cr) { + my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl"); + my $path = "/path/to/$cr_name"; + $cr_name = dq_escape($cr_name); + + $$txt .= qq([coderepo "$cr_name"]\n); + if ($urls && scalar(@$urls)) { + $$txt .= "\t; "; + $$txt .= join(" ||\n\t;\t", map {; + my $dst = $path; + if ($path !~ m![a-z0-9_/\.\-]!i) { + $dst = '"'.dq_escape($dst).'"'; + } + qq(git clone $_ $dst); + } @$urls); + $$txt .= "\n"; + } + $$txt .= "\tdir = $path\n"; + $$txt .= "\tcgiturl = https://example.com/"; + $$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n"; + } +} + +# n.b. this is a perfect candidate for memoization +sub inbox_config ($$$) { + my ($ctx, $hdr, $txt) = @_; + my $ibx = $ctx->{ibx}; + push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config'; + my $t = eval { $ibx->mm->created_at }; + push(@$hdr, 'Last-Modified', time2str($t)) if $t; + my $name = dq_escape($ibx->{name}); + my $inboxdir = '/path/to/top-level-inbox'; + my $base_url = $ibx->base_url($ctx->{env}); + $$txt .= <{$k}) or next; + $$txt .= "\t$k = $_\n" for @$v; + } + if (my $altid = $ibx->{altid}) { + my $altid_map = $ibx->altid_map; + $$txt .= <{$k}) or next; + $$txt .= "\t$k = $v\n"; + } + $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)}); + $$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)}); + _coderepo_config($ctx, $txt); + 1; +} + +# n.b. this is a perfect candidate for memoization +sub extindex_config ($$$) { + my ($ctx, $hdr, $txt) = @_; + my $ibx = $ctx->{ibx}; + push @$hdr, 'Content-Disposition', 'inline; filename=extindex.config'; + my $name = dq_escape($ibx->{name}); + my $base_url = $ibx->base_url($ctx->{env}); + $$txt .= <{$k}) or next; + $$txt .= "\t$k = $v\n"; + } + _coderepo_config($ctx, $txt); + 1; +} + +sub coderepos_raw ($$) { + my ($ctx, $top_url) = @_; + my $cr = $ctx->{ibx}->{coderepo} // return (); + my $cfg = $ctx->{www}->{pi_cfg}; + my @ret; + for my $cr_name (@$cr) { + $ret[0] //= do { + my $thing = $ctx->{ibx}->can('cloneurl') ? + 'public inbox' : 'external index'; + <get_all("coderepo.$cr_name.cgiturl"); + if ($urls) { + for (@$urls) { + # relative or absolute URL?, prefix relative + # "foo.git" with appropriate number of "../" + my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ : + $top_url.$_; + $ret[0] .= "\n\t" . prurl($ctx->{env}, $u); + } + } else { + $ret[0] .= qq[\n\t$cr_name.git (no URL configured)]; + } + } + @ret; # may be empty, this sub is called as an arg for join() +} + +sub _mirror_help ($$) { + my ($ctx, $txt) = @_; + my $ibx = $ctx->{ibx}; + my $base_url = $ibx->base_url($ctx->{env}); + chop $base_url; # no trailing slash for "git clone" + my $dir = (split(m!/!, $base_url))[-1]; + my %seen = ($base_url => 1); + my $top_url = $base_url; + $top_url =~ s!/[^/]+\z!/!; + $$txt .= "public-inbox mirroring instructions\n\n"; + if ($ibx->can('cloneurl')) { # PublicInbox::Inbox + $$txt .= + "This public inbox may be cloned and mirrored by anyone:\n"; + my @urls; + my $max = $ibx->max_git_epoch; + # TODO: some of these URLs may be too long and we may need to + # do something like code_footer() above, but these are local + # admin-defined + if (defined($max)) { # v2 + for my $i (0..$max) { + # old epochs my be deleted: + -d "$ibx->{inboxdir}/git/$i.git" or next; + my $url = "$base_url/$i"; + $seen{$url} = 1; + push @urls, "$url $dir/git/$i.git"; + } + my $nr = scalar(@urls); + if ($nr > 1) { + $$txt .= "\n\t"; + $$txt .= "# this inbox consists of $nr epochs:"; + $urls[0] .= " # oldest"; + $urls[-1] .= " # newest"; + } + } else { # v1 + push @urls, $base_url; + } + # FIXME: epoch splits can be different in other repositories, + # use the "cloneurl" file as-is for now: + for my $u (@{$ibx->cloneurl}) { + next if $seen{$u}++; + push @urls, $u; + } + $$txt .= "\n"; + $$txt .= join('', map { "\tgit clone --mirror $_\n" } @urls); + if (my $addrs = $ibx->{address}) { + $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY'; + my $v = defined $max ? '-V2' : '-V1'; + $$txt .= <{name} $dir/ $base_url \\ + $addrs + public-inbox-index $dir +EOF + } + } else { # PublicInbox::ExtSearch + $$txt .= <{www}->{pi_cfg}->{lc('publicInbox.wwwListing')}; + if (($v // '') =~ /\A(?:all|match=domain)\z/) { + $$txt .= <can('imap_url')) { + my $imap = $ibx->imap_url($ctx); + if (@$imap) { + $$txt .= "\n"; + $$txt .= 'IMAP subfolder(s) available under:'; + $$txt .= "\n\t" . join("\n\t", @$imap) . "\n"; + $$txt .= <can('nntp_url')) { + my $nntp = $ibx->nntp_url($ctx); + if (scalar @$nntp) { + $$txt .= "\n"; + $$txt .= @$nntp == 1 ? 'Newsgroup' : 'Newsgroups are'; + $$txt .= ' available over NNTP:'; + $$txt .= "\n\t" . join("\n\t", @$nntp) . "\n"; + } + } + if ($$txt =~ m!\b[^:]+://\w+\.onion/!) { + $$txt .= <{env}, $PublicInbox::WwwStream::CODE_URL); + $$txt .= join("\n\n", + coderepos_raw($ctx, $top_url), # may be empty + "AGPL code for this site:\n\tgit clone $code_url"); + 1; +} + +sub _default_text ($$$$) { + my ($ctx, $key, $hdr, $txt) = @_; + if ($key eq 'mirror') { + return _mirror_help($ctx, $txt); + } elsif ($key eq 'color') { + return _colors_help($ctx, $txt); + } elsif ($key eq 'config') { + return $ctx->{ibx}->can('cloneurl') ? + inbox_config($ctx, $hdr, $txt) : + extindex_config($ctx, $hdr, $txt); + } -sub _default_text ($$$) { - my ($ctx, $key, $txt) = @_; return if $key ne 'help'; # TODO more keys? - my $ibx = $ctx->{-inbox}; + my $ibx = $ctx->{ibx}; my $base_url = $ibx->base_url($ctx->{env}); $$txt .= "public-inbox help for $base_url\n"; $$txt .= <search; + my $srch = $ibx->isrch; if ($srch) { $$txt .= <over; + if ($over) { + $$txt .= <