X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWWW.pm;h=7bd29732ddd3d54e165b2910c27e25ef14fab087;hb=7b5ea579e6a9490a4a38958acac8e078d805eec7;hp=da5c1d304f89143a2fedc79e1649c4d521f4ad13;hpb=5bc94392bd67d8e2a919e357d569751b9295475a;p=public-inbox.git diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index da5c1d30..7bd29732 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -1,5 +1,5 @@ -# Copyright (C) 2014-2015 all contributors -# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# Copyright (C) 2014-2018 all contributors +# License: AGPL-3.0+ # # Main web interface for mailing list archives # @@ -13,17 +13,15 @@ package PublicInbox::WWW; use 5.008; use strict; use warnings; -use Plack::Request; use PublicInbox::Config; use PublicInbox::Hval; -use URI::Escape qw(uri_escape_utf8 uri_unescape); -use constant SSOMA_URL => '//ssoma.public-inbox.org/'; -use constant PI_URL => '//public-inbox.org/'; +use URI::Escape qw(uri_unescape); +use PublicInbox::MID qw(mid_escape); require PublicInbox::Git; use PublicInbox::GitHTTPBackend; our $INBOX_RE = qr!\A/([\w\.\-]+)!; our $MID_RE = qr!([^/]+)!; -our $END_RE = qr!(t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; +our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i; sub new { @@ -40,27 +38,29 @@ sub run { sub call { my ($self, $env) = @_; - my $cgi = Plack::Request->new($env); - my $ctx = { cgi => $cgi, env => $env, www => $self, - pi_config => $self->{pi_config} }; + my $ctx = { env => $env, www => $self }; # we don't care about multi-value my %qp = map { - my ($k, $v) = split('=', $_, 2); + utf8::decode($_); + my ($k, $v) = split('=', uri_unescape($_), 2); $v = '' unless defined $v; $v =~ tr/+/ /; ($k, $v) - } split(/[&;]/, uri_unescape($env->{QUERY_STRING})); + } split(/[&;]+/, $env->{QUERY_STRING}); $ctx->{qp} = \%qp; my $path_info = $env->{PATH_INFO}; my $method = $env->{REQUEST_METHOD}; - if ($method eq 'POST' && - $path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) { - my $path = $2; - return (invalid_inbox($self, $ctx, $1) || - serve_git($env, $ctx->{git}, $path)); + if ($method eq 'POST') { + if ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?(git-upload-pack)\z!) { + my ($part, $path) = ($2, $3); + return invalid_inbox($ctx, $1) || + serve_git($ctx, $part, $path); + } elsif ($path_info =~ m!$INBOX_RE/!o) { + return invalid_inbox($ctx, $1) || mbox_results($ctx); + } } elsif ($method !~ /\AGET|HEAD\z/) { return r(405, 'Method Not Allowed'); @@ -70,31 +70,30 @@ sub call { if ($path_info eq '/') { r404(); } elsif ($path_info =~ m!$INBOX_RE\z!o) { - invalid_inbox($self, $ctx, $1) || r301($ctx, $1); + invalid_inbox($ctx, $1) || r301($ctx, $1); } elsif ($path_info =~ m!$INBOX_RE(?:/|/index\.html)?\z!o) { - invalid_inbox($self, $ctx, $1) || get_index($ctx); + invalid_inbox($ctx, $1) || get_index($ctx); } elsif ($path_info =~ m!$INBOX_RE/(?:atom\.xml|new\.atom)\z!o) { - invalid_inbox($self, $ctx, $1) || get_atom($ctx); + invalid_inbox($ctx, $1) || get_atom($ctx); } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) { - invalid_inbox($self, $ctx, $1) || get_new($ctx); - } elsif ($path_info =~ m!$INBOX_RE/ + invalid_inbox($ctx, $1) || get_new($ctx); + } elsif ($path_info =~ m!$INBOX_RE/(?:(\d+)/)? ($PublicInbox::GitHTTPBackend::ANY)\z!ox) { - my $path = $2; - invalid_inbox($self, $ctx, $1) || - serve_git($env, $ctx->{git}, $path); + my ($part, $path) = ($2, $3); + invalid_inbox($ctx, $1) || serve_git($ctx, $part, $path); } elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) { - serve_mbox_range($self, $ctx, $1, $2); + serve_mbox_range($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) { - msg_page($self, $ctx, $1, $2, $3); + msg_page($ctx, $1, $2, $3); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$ATTACH_RE\z!o) { my ($idx, $fn) = ($3, $4); - invalid_inbox_mid($self, $ctx, $1, $2) || - get_attach($ctx, $idx, $fn); + invalid_inbox_mid($ctx, $1, $2) || get_attach($ctx, $idx, $fn); # in case people leave off the trailing slash: - } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(?:T|T/|t)\z!o) { - my ($inbox, $mid) = ($1, $2); - r301($ctx, $inbox, $mid, 't/#u'); + } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(T|t)\z!o) { + my ($inbox, $mid, $suffix) = ($1, $2, $3); + $suffix .= $suffix =~ /\A[tT]\z/ ? '/#u' : '/'; + r301($ctx, $inbox, $mid, $suffix); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/R/?\z!o) { my ($inbox, $mid) = ($1, $2); @@ -102,13 +101,15 @@ sub call { } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) { r301($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) { + get_text($ctx, $1, $2); # convenience redirects order matters } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) { r301($ctx, $1, $2); } else { - legacy_redirects($self, $ctx, $path_info); + legacy_redirects($ctx, $path_info); } } @@ -116,8 +117,8 @@ sub call { sub preload { require PublicInbox::Feed; require PublicInbox::View; - require PublicInbox::Thread; - require Email::MIME; + require PublicInbox::SearchThread; + require PublicInbox::MIME; require Digest::SHA; require POSIX; @@ -144,16 +145,13 @@ sub r404 { sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } # returns undef if valid, array ref response if invalid -sub invalid_inbox { - my ($self, $ctx, $inbox) = @_; - my $obj = $ctx->{pi_config}->lookup_name($inbox); +sub invalid_inbox ($$) { + my ($ctx, $inbox) = @_; + my $www = $ctx->{www}; + my $obj = $www->{pi_config}->lookup_name($inbox); if (defined $obj) { - $ctx->{git_dir} = $obj->{mainrepo}; $ctx->{git} = $obj->git; - # for PublicInbox::HTTP::weaken_task: - $ctx->{cgi}->{env}->{'pi-httpd.inbox'} = $obj; $ctx->{-inbox} = $obj; - $ctx->{inbox} = $inbox; return; } @@ -161,16 +159,16 @@ sub invalid_inbox { # generation and link things intended for nntp:// to https?://, # so try to infer links and redirect them to the appropriate # list URL. - $self->news_www->call($ctx->{cgi}->{env}); + $www->news_www->call($ctx->{env}); } # returns undef if valid, array ref response if invalid sub invalid_inbox_mid { - my ($self, $ctx, $inbox, $mid) = @_; - my $ret = invalid_inbox($self, $ctx, $inbox); + my ($ctx, $inbox, $mid) = @_; + my $ret = invalid_inbox($ctx, $inbox); return $ret if $ret; - $ctx->{mid} = $mid = uri_unescape($mid); + $ctx->{mid} = $mid; if ($mid =~ /\A[a-f0-9]{40}\z/) { # this is horiffically wasteful for legacy URLs: if ($mid = mid2blob($ctx)) { @@ -201,8 +199,7 @@ sub get_new { sub get_index { my ($ctx) = @_; require PublicInbox::Feed; - my $srch = searcher($ctx); - footer($ctx); + searcher($ctx); if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) { require PublicInbox::SearchView; PublicInbox::SearchView::sres_top_html($ctx); @@ -220,33 +217,39 @@ sub mid2blob { # /$INBOX/$MESSAGE_ID/raw -> raw mbox sub get_mid_txt { my ($ctx) = @_; - my $x = mid2blob($ctx) or return r404($ctx); require PublicInbox::Mbox; - PublicInbox::Mbox::emit1($ctx, $x); + PublicInbox::Mbox::emit_raw($ctx) || r404($ctx); } # /$INBOX/$MESSAGE_ID/ -> HTML content (short quotes) sub get_mid_html { my ($ctx) = @_; - my $x = mid2blob($ctx) or return r404($ctx); - require PublicInbox::View; - my $foot = footer($ctx); - require Email::MIME; - my $mime = Email::MIME->new($x); searcher($ctx); - [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ], - PublicInbox::View::msg_html($ctx, $mime, $foot) ]; + PublicInbox::View::msg_page($ctx) || r404($ctx); } # /$INBOX/$MESSAGE_ID/t/ sub get_thread { - my ($ctx) = @_; + my ($ctx, $flat) = @_; searcher($ctx) or return need_search($ctx); + $ctx->{flat} = $flat; require PublicInbox::View; PublicInbox::View::thread_html($ctx); } +# /$INBOX/_/text/$KEY/ +# /$INBOX/_/text/$KEY/raw +# KEY may contain slashes +sub get_text { + my ($ctx, $inbox, $key) = @_; + my $r404 = invalid_inbox($ctx, $inbox); + return $r404 if $r404; + + require PublicInbox::WwwText; + PublicInbox::WwwText::get_text($ctx, $key); +} + sub ctx_get { my ($ctx, $key) = @_; my $val = $ctx->{$key}; @@ -254,43 +257,6 @@ sub ctx_get { $val; } -sub footer { - my ($ctx) = @_; - return '' unless $ctx; - my $obj = $ctx->{-inbox} or return ''; - - # auto-generate a footer - chomp(my $desc = $obj->description); - $desc = PublicInbox::Hval::ascii_html($desc); - - my $urls; - my @urls = @{$obj->cloneurl}; - my %seen = map { $_ => 1 } @urls; - my $cgi = $ctx->{cgi}; - my $http = $cgi->base->as_string . $obj->{name}; - $seen{$http} or unshift @urls, $http; - my $ssoma_url = PublicInbox::Hval::prurl($ctx->{env}, SSOMA_URL); - if (scalar(@urls) == 1) { - $urls = "URL for ssoma or git clone --mirror $urls[0]); - } else { - $urls = "URLs for ssoma or git clone --mirror\n) . - join("\n", map { "\tgit clone --mirror $_" } @urls); - } - - my $addr = $obj->{-primary_address}; - $ctx->{footer} = join("\n", - '- ' . $desc, - "A {cgi}->{env}, PI_URL) . - '">public-inbox, ' . - 'anybody may post in plain-text (not HTML):', - $addr, - $urls - ); -} - # search support is optional, returns undef if Xapian is not installed # or not configured for the given GIT_DIR sub searcher { @@ -328,13 +294,12 @@ sub get_thread_mbox { sub get_thread_atom { my ($ctx) = @_; searcher($ctx) or return need_search($ctx); - $ctx->{self_url} = $ctx->{cgi}->uri->as_string; require PublicInbox::Feed; PublicInbox::Feed::generate_thread_atom($ctx); } sub legacy_redirects { - my ($self, $ctx, $path_info) = @_; + my ($ctx, $path_info) = @_; # single-message pages if ($path_info =~ m!$INBOX_RE/m/(\S+)/\z!o) { @@ -379,7 +344,7 @@ sub legacy_redirects { # some Message-IDs have slashes in them and the HTTP server # may try to be clever and unescape them :< } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/$END_RE\z!o) { - msg_page($self, $ctx, $1, $2, $3); + msg_page($ctx, $1, $2, $3); # in case people leave off the trailing slash: } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/(T|t)\z!o) { @@ -387,22 +352,21 @@ sub legacy_redirects { } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/f\z!o) { r301($ctx, $1, $2); } else { - $self->news_www->call($ctx->{cgi}->{env}); + $ctx->{www}->news_www->call($ctx->{env}); } } sub r301 { my ($ctx, $inbox, $mid, $suffix) = @_; - my $cgi = $ctx->{cgi}; my $obj = $ctx->{-inbox}; unless ($obj) { - my $r404 = invalid_inbox($ctx->{www}, $ctx, $inbox); + my $r404 = invalid_inbox($ctx, $inbox); return $r404 if $r404; $obj = $ctx->{-inbox}; } - my $url = $obj->base_url($cgi); + my $url = $obj->base_url($ctx->{env}); my $qs = $ctx->{env}->{QUERY_STRING}; - $url .= (uri_escape_utf8($mid) . '/') if (defined $mid); + $url .= (mid_escape($mid) . '/') if (defined $mid); $url .= $suffix if (defined $suffix); $url .= "?$qs" if $qs ne ''; @@ -412,10 +376,11 @@ sub r301 { } sub msg_page { - my ($self, $ctx, $inbox, $mid, $e) = @_; + my ($ctx, $inbox, $mid, $e) = @_; my $ret; - $ret = invalid_inbox_mid($self, $ctx, $inbox, $mid) and return $ret; + $ret = invalid_inbox_mid($ctx, $inbox, $mid) and return $ret; '' eq $e and return get_mid_html($ctx); + 'T/' eq $e and return get_thread($ctx, 1); 't/' eq $e and return get_thread($ctx); 't.atom' eq $e and return get_thread_atom($ctx); 't.mbox' eq $e and return get_thread_mbox($ctx); @@ -428,13 +393,26 @@ sub msg_page { } sub serve_git { - my ($env, $git, $path) = @_; - PublicInbox::GitHTTPBackend::serve($env, $git, $path); + my ($ctx, $part, $path) = @_; + my $env = $ctx->{env}; + my $ibx = $ctx->{-inbox}; + my $git = defined $part ? $ibx->git_part($part) : $ibx->git; + $git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404(); +} + +sub mbox_results { + my ($ctx) = @_; + if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) { + searcher($ctx) or return need_search($ctx); + require PublicInbox::SearchView; + return PublicInbox::SearchView::mbox_results($ctx); + } + r404(); } sub serve_mbox_range { - my ($self, $ctx, $inbox, $range) = @_; - invalid_inbox($self, $ctx, $inbox) || eval { + my ($ctx, $inbox, $range) = @_; + invalid_inbox($ctx, $inbox) || eval { require PublicInbox::Mbox; searcher($ctx); PublicInbox::Mbox::emit_range($ctx, $range); @@ -443,10 +421,10 @@ sub serve_mbox_range { sub news_www { my ($self) = @_; - my $nw = $self->{news_www}; - return $nw if $nw; - require PublicInbox::NewsWWW; - $self->{news_www} = PublicInbox::NewsWWW->new($self->{pi_config}); + $self->{news_www} ||= do { + require PublicInbox::NewsWWW; + PublicInbox::NewsWWW->new($self->{pi_config}); + } } sub get_attach {