X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWWW.pm;h=2434f2f581b2b0fbd1d48d1a08881bf0a1c1187b;hb=31ec75b69e02df6330bc16dd2bd48d354726f4c1;hp=8e1b1afe9c6c6872be0fa8fc9f195a81ad75a3a8;hpb=0b1de991a099b5e8b9a9e3e85b5eaaacc9362dbb;p=public-inbox.git diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 8e1b1afe..2434f2f5 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2018 all contributors +# Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ # # Main web interface for mailing list archives @@ -15,20 +15,19 @@ use 5.010_001; use strict; use warnings; use bytes (); # only for bytes::length -use Plack::Util; use PublicInbox::Config; use PublicInbox::Hval; use URI::Escape qw(uri_unescape); use PublicInbox::MID qw(mid_escape); -require PublicInbox::Git; use PublicInbox::GitHTTPBackend; use PublicInbox::UserContent; +use PublicInbox::WwwStatic qw(r path_info_raw); # TODO: consider a routing tree now that we have more endpoints: our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; -our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i; +our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!; our $OID_RE = qr![a-f0-9]{7,40}!; sub new { @@ -43,51 +42,39 @@ sub run { PublicInbox::WWW->new->call($req->env); } -my %path_re_cache; - -sub path_re ($) { - my $sn = $_[0]->{SCRIPT_NAME}; - $path_re_cache{$sn} ||= do { - $sn = '/'.$sn unless index($sn, '/') == 0; - $sn =~ s!/\z!!; - qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!; - }; -} - sub call { my ($self, $env) = @_; my $ctx = { env => $env, www => $self }; # we don't care about multi-value - my %qp = map { + %{$ctx->{qp}} = map { utf8::decode($_); - my ($k, $v) = split('=', uri_unescape($_), 2); - $v = '' unless defined $v; - $v =~ tr/+/ /; - ($k, $v) + tr/+/ /; + my ($k, $v) = split('=', $_, 2); + $v = uri_unescape($v // ''); + # none of the keys we care about will need escaping + $k => $v; } split(/[&;]+/, $env->{QUERY_STRING}); - $ctx->{qp} = \%qp; - # avoiding $env->{PATH_INFO} here since that's already decoded - my ($path_info) = ($env->{REQUEST_URI} =~ path_re($env)); - $path_info //= $env->{PATH_INFO}; + my $path_info = path_info_raw($env); my $method = $env->{REQUEST_METHOD}; if ($method eq 'POST') { - if ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?(git-upload-pack)\z!) { - my ($part, $path) = ($2, $3); + if ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)? + (git-upload-pack)\z!x) { + my ($epoch, $path) = ($2, $3); return invalid_inbox($ctx, $1) || - serve_git($ctx, $part, $path); + serve_git($ctx, $epoch, $path); } elsif ($path_info =~ m!$INBOX_RE/!o) { return invalid_inbox($ctx, $1) || mbox_results($ctx); } } - elsif ($method !~ /\AGET|HEAD\z/) { - return r(405, 'Method Not Allowed'); + elsif ($method !~ /\A(?:GET|HEAD)\z/) { + return r(405); } # top-level indices and feeds - if ($path_info eq '/') { + if ($path_info eq '/' || $path_info eq '/manifest.js.gz') { www_listing($self)->call($env); } elsif ($path_info =~ m!$INBOX_RE\z!o) { invalid_inbox($ctx, $1) || r301($ctx, $1); @@ -97,11 +84,13 @@ sub call { invalid_inbox($ctx, $1) || get_atom($ctx); } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) { invalid_inbox($ctx, $1) || get_new($ctx); - } elsif ($path_info =~ m!$INBOX_RE/(?:(\d+)/)? + } elsif ($path_info =~ m!$INBOX_RE/description\z!o) { + get_description($ctx, $1); + } elsif ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)? ($PublicInbox::GitHTTPBackend::ANY)\z!ox) { - my ($part, $path) = ($2, $3); - invalid_inbox($ctx, $1) || serve_git($ctx, $part, $path); - } elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) { + my ($epoch, $path) = ($2, $3); + invalid_inbox($ctx, $1) || serve_git($ctx, $epoch, $path); + } elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-]+).mbox\.gz\z!o) { serve_mbox_range($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) { msg_page($ctx, $1, $2, $3); @@ -123,11 +112,14 @@ sub call { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) { get_text($ctx, $1, $2); - } elsif ($path_info =~ m!$INBOX_RE/([\w\-\.]+)\.css\z!o) { + } elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-\.]+)\.css\z!o) { get_css($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/manifest\.js\.gz\z!o) { + get_inbox_manifest($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) { get_vcs_object($ctx, $1, $2); - } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) { + } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/ + ($PublicInbox::Hval::FN)\z!ox) { get_vcs_object($ctx, $1, $2, $3); } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) { r301($ctx, $1, $2, 's/'); @@ -140,31 +132,47 @@ sub call { } } -# for CoW-friendliness, MOOOOO! +# for CoW-friendliness, MOOOOO! Even for single-process setups, +# we want to get all immortal allocations done early to avoid heap +# fragmentation since common allocators favor a large contiguous heap. sub preload { my ($self) = @_; + require PublicInbox::ExtMsg; require PublicInbox::Feed; require PublicInbox::View; require PublicInbox::SearchThread; require PublicInbox::MIME; - require Digest::SHA; - require POSIX; + require PublicInbox::Mbox; + require PublicInbox::ViewVCS; + require PublicInbox::WwwText; + require PublicInbox::WwwAttach; eval { require PublicInbox::Search; PublicInbox::Search::load_xapian(); }; - foreach (qw(PublicInbox::SearchView - PublicInbox::Mbox IO::Compress::Gzip - PublicInbox::NewsWWW)) { + foreach (qw(PublicInbox::SearchView PublicInbox::MboxGz)) { eval "require $_;"; } if (ref($self)) { + my $pi_config = $self->{pi_config}; + if (defined($pi_config->{'publicinbox.cgitrc'})) { + $pi_config->limiter('-cgit'); + } $self->cgit; $self->stylesheets_prepare($_) for ('', '../', '../../'); $self->www_listing; + $self->news_www; + $pi_config->each_inbox(\&preload_inbox); } } +sub preload_inbox { + my $ibx = shift; + $ibx->cloneurl; + $ibx->description; + $ibx->base_url; +} + # private functions below sub r404 { @@ -173,12 +181,9 @@ sub r404 { require PublicInbox::ExtMsg; return PublicInbox::ExtMsg::ext_msg($ctx); } - r(404, 'Not Found'); + r(404); } -# simple response for errors -sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } - sub news_cgit_fallback ($) { my ($ctx) = @_; my $www = $ctx->{www}; @@ -296,13 +301,6 @@ sub get_vcs_object ($$$;$) { PublicInbox::ViewVCS::show($ctx, $oid, $filename); } -sub ctx_get { - my ($ctx, $key) = @_; - my $val = $ctx->{$key}; - (defined $val && $val ne '') or die "BUG: bad ctx, $key unusable"; - $val; -} - sub need { my ($ctx, $extra) = @_; my $msg = <{env}; my $ibx = $ctx->{-inbox}; - my $git = defined $part ? $ibx->git_part($part) : $ibx->git; + my $git = defined $epoch ? $ibx->git_epoch($epoch) : $ibx->git; $git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404(); } @@ -475,6 +473,7 @@ sub cgit { require PublicInbox::Cgit; PublicInbox::Cgit->new($pi_config); } else { + require Plack::Util; Plack::Util::inline_object(call => sub { r404() }); } } @@ -488,6 +487,15 @@ sub www_listing { } } +# GET $INBOX/manifest.js.gz +sub get_inbox_manifest ($$$) { + my ($ctx, $inbox, $key) = @_; + my $r404 = invalid_inbox($ctx, $inbox); + return $r404 if $r404; + require PublicInbox::WwwListing; + PublicInbox::WwwListing::js($ctx->{env}, [$ctx->{-inbox}]); +} + sub get_attach { my ($ctx, $idx, $fn) = @_; require PublicInbox::WwwAttach; @@ -534,11 +542,15 @@ sub stylesheets_prepare ($$) { $inline_ok = 0; } else { my $fn = $_; + my ($key) = (m!([^/]+?)(?:\.css)?\z!i); + if ($key !~ /\A[a-zA-Z0-9_\-\.]+\z/) { + warn "ignoring $fn, non-ASCII word character\n"; + next; + } open(my $fh, '<', $fn) or do { warn "failed to open $fn: $!\n"; next; }; - my ($key) = (m!([^/]+?)(?:\.css)?\z!i); my $ctime = 0; my $local = do { local $/; <$fh> }; if ($local =~ /\S/) { @@ -626,4 +638,13 @@ sub get_css ($$$) { [ 200, $h, [ $css ] ]; } +sub get_description { + my ($ctx, $inbox) = @_; + invalid_inbox($ctx, $inbox) || do { + my $d = $ctx->{-inbox}->description . "\n"; + [ 200, [ 'Content-Length', bytes::length($d), + 'Content-Type', 'text/plain' ], [ $d ] ]; + }; +} + 1;