X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWWW.pm;h=4ddc187b7b46e9e6b928a200d1fb1c7109a1d2ce;hb=3d41aa23f35501ca92aab8aa42980fa73f7fa74f;hp=b602206d1cfa3a683667060125081e35b94a59d0;hpb=8d479224468ad9c8053ed80cd3532b8d9343e653;p=public-inbox.git diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index b602206d..4ddc187b 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -1,5 +1,5 @@ -# Copyright (C) 2014-2015 all contributors -# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# Copyright (C) 2014-2018 all contributors +# License: AGPL-3.0+ # # Main web interface for mailing list archives # @@ -15,7 +15,8 @@ use strict; use warnings; use PublicInbox::Config; use PublicInbox::Hval; -use URI::Escape qw(uri_escape_utf8 uri_unescape); +use URI::Escape qw(uri_unescape); +use PublicInbox::MID qw(mid_escape); require PublicInbox::Git; use PublicInbox::GitHTTPBackend; our $INBOX_RE = qr!\A/([\w\.\-]+)!; @@ -37,25 +38,29 @@ sub run { sub call { my ($self, $env) = @_; - my $ctx = { env => $env, www => $self, pi_config => $self->{pi_config} }; + my $ctx = { env => $env, www => $self }; # we don't care about multi-value my %qp = map { - my ($k, $v) = split('=', $_, 2); + utf8::decode($_); + my ($k, $v) = split('=', uri_unescape($_), 2); $v = '' unless defined $v; $v =~ tr/+/ /; ($k, $v) - } split(/[&;]/, uri_unescape($env->{QUERY_STRING})); + } split(/[&;]+/, $env->{QUERY_STRING}); $ctx->{qp} = \%qp; my $path_info = $env->{PATH_INFO}; my $method = $env->{REQUEST_METHOD}; - if ($method eq 'POST' && - $path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) { - my $path = $2; - return (invalid_inbox($self, $ctx, $1) || - serve_git($env, $ctx->{git}, $path)); + if ($method eq 'POST') { + if ($path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) { + my $path = $2; + return invalid_inbox($ctx, $1) || + serve_git($ctx, $path); + } elsif ($path_info =~ m!$INBOX_RE/!o) { + return invalid_inbox($ctx, $1) || mbox_results($ctx); + } } elsif ($method !~ /\AGET|HEAD\z/) { return r(405, 'Method Not Allowed'); @@ -65,27 +70,25 @@ sub call { if ($path_info eq '/') { r404(); } elsif ($path_info =~ m!$INBOX_RE\z!o) { - invalid_inbox($self, $ctx, $1) || r301($ctx, $1); + invalid_inbox($ctx, $1) || r301($ctx, $1); } elsif ($path_info =~ m!$INBOX_RE(?:/|/index\.html)?\z!o) { - invalid_inbox($self, $ctx, $1) || get_index($ctx); + invalid_inbox($ctx, $1) || get_index($ctx); } elsif ($path_info =~ m!$INBOX_RE/(?:atom\.xml|new\.atom)\z!o) { - invalid_inbox($self, $ctx, $1) || get_atom($ctx); + invalid_inbox($ctx, $1) || get_atom($ctx); } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) { - invalid_inbox($self, $ctx, $1) || get_new($ctx); + invalid_inbox($ctx, $1) || get_new($ctx); } elsif ($path_info =~ m!$INBOX_RE/ ($PublicInbox::GitHTTPBackend::ANY)\z!ox) { my $path = $2; - invalid_inbox($self, $ctx, $1) || - serve_git($env, $ctx->{git}, $path); + invalid_inbox($ctx, $1) || serve_git($ctx, $path); } elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) { - serve_mbox_range($self, $ctx, $1, $2); + serve_mbox_range($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) { - msg_page($self, $ctx, $1, $2, $3); + msg_page($ctx, $1, $2, $3); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$ATTACH_RE\z!o) { my ($idx, $fn) = ($3, $4); - invalid_inbox_mid($self, $ctx, $1, $2) || - get_attach($ctx, $idx, $fn); + invalid_inbox_mid($ctx, $1, $2) || get_attach($ctx, $idx, $fn); # in case people leave off the trailing slash: } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(T|t)\z!o) { my ($inbox, $mid, $suffix) = ($1, $2, $3); @@ -98,13 +101,15 @@ sub call { } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) { r301($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) { + get_text($ctx, $1, $2); # convenience redirects order matters } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) { r301($ctx, $1, $2); } else { - legacy_redirects($self, $ctx, $path_info); + legacy_redirects($ctx, $path_info); } } @@ -112,8 +117,8 @@ sub call { sub preload { require PublicInbox::Feed; require PublicInbox::View; - require PublicInbox::Thread; - require Email::MIME; + require PublicInbox::SearchThread; + require PublicInbox::MIME; require Digest::SHA; require POSIX; @@ -140,9 +145,10 @@ sub r404 { sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } # returns undef if valid, array ref response if invalid -sub invalid_inbox { - my ($self, $ctx, $inbox) = @_; - my $obj = $ctx->{pi_config}->lookup_name($inbox); +sub invalid_inbox ($$) { + my ($ctx, $inbox) = @_; + my $www = $ctx->{www}; + my $obj = $www->{pi_config}->lookup_name($inbox); if (defined $obj) { $ctx->{git_dir} = $obj->{mainrepo}; $ctx->{git} = $obj->git; @@ -155,16 +161,16 @@ sub invalid_inbox { # generation and link things intended for nntp:// to https?://, # so try to infer links and redirect them to the appropriate # list URL. - $self->news_www->call($ctx->{env}); + $www->news_www->call($ctx->{env}); } # returns undef if valid, array ref response if invalid sub invalid_inbox_mid { - my ($self, $ctx, $inbox, $mid) = @_; - my $ret = invalid_inbox($self, $ctx, $inbox); + my ($ctx, $inbox, $mid) = @_; + my $ret = invalid_inbox($ctx, $inbox); return $ret if $ret; - $ctx->{mid} = $mid = uri_unescape($mid); + $ctx->{mid} = $mid; if ($mid =~ /\A[a-f0-9]{40}\z/) { # this is horiffically wasteful for legacy URLs: if ($mid = mid2blob($ctx)) { @@ -195,7 +201,7 @@ sub get_new { sub get_index { my ($ctx) = @_; require PublicInbox::Feed; - my $srch = searcher($ctx); + searcher($ctx); if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) { require PublicInbox::SearchView; PublicInbox::SearchView::sres_top_html($ctx); @@ -224,8 +230,8 @@ sub get_mid_html { my $x = mid2blob($ctx) or return r404($ctx); require PublicInbox::View; - require Email::MIME; - my $mime = Email::MIME->new($x); + require PublicInbox::MIME; + my $mime = PublicInbox::MIME->new($x); searcher($ctx); PublicInbox::View::msg_html($ctx, $mime); } @@ -239,6 +245,18 @@ sub get_thread { PublicInbox::View::thread_html($ctx); } +# /$INBOX/_/text/$KEY/ +# /$INBOX/_/text/$KEY/raw +# KEY may contain slashes +sub get_text { + my ($ctx, $inbox, $key) = @_; + my $r404 = invalid_inbox($ctx, $inbox); + return $r404 if $r404; + + require PublicInbox::WwwText; + PublicInbox::WwwText::get_text($ctx, $key); +} + sub ctx_get { my ($ctx, $key) = @_; my $val = $ctx->{$key}; @@ -288,7 +306,7 @@ sub get_thread_atom { } sub legacy_redirects { - my ($self, $ctx, $path_info) = @_; + my ($ctx, $path_info) = @_; # single-message pages if ($path_info =~ m!$INBOX_RE/m/(\S+)/\z!o) { @@ -333,7 +351,7 @@ sub legacy_redirects { # some Message-IDs have slashes in them and the HTTP server # may try to be clever and unescape them :< } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/$END_RE\z!o) { - msg_page($self, $ctx, $1, $2, $3); + msg_page($ctx, $1, $2, $3); # in case people leave off the trailing slash: } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/(T|t)\z!o) { @@ -341,7 +359,7 @@ sub legacy_redirects { } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/f\z!o) { r301($ctx, $1, $2); } else { - $self->news_www->call($ctx->{env}); + $ctx->{www}->news_www->call($ctx->{env}); } } @@ -349,13 +367,13 @@ sub r301 { my ($ctx, $inbox, $mid, $suffix) = @_; my $obj = $ctx->{-inbox}; unless ($obj) { - my $r404 = invalid_inbox($ctx->{www}, $ctx, $inbox); + my $r404 = invalid_inbox($ctx, $inbox); return $r404 if $r404; $obj = $ctx->{-inbox}; } my $url = $obj->base_url($ctx->{env}); my $qs = $ctx->{env}->{QUERY_STRING}; - $url .= (uri_escape_utf8($mid) . '/') if (defined $mid); + $url .= (mid_escape($mid) . '/') if (defined $mid); $url .= $suffix if (defined $suffix); $url .= "?$qs" if $qs ne ''; @@ -365,9 +383,9 @@ sub r301 { } sub msg_page { - my ($self, $ctx, $inbox, $mid, $e) = @_; + my ($ctx, $inbox, $mid, $e) = @_; my $ret; - $ret = invalid_inbox_mid($self, $ctx, $inbox, $mid) and return $ret; + $ret = invalid_inbox_mid($ctx, $inbox, $mid) and return $ret; '' eq $e and return get_mid_html($ctx); 'T/' eq $e and return get_thread($ctx, 1); 't/' eq $e and return get_thread($ctx); @@ -382,13 +400,23 @@ sub msg_page { } sub serve_git { - my ($env, $git, $path) = @_; - PublicInbox::GitHTTPBackend::serve($env, $git, $path); + my ($ctx, $path) = @_; + PublicInbox::GitHTTPBackend::serve($ctx->{env}, $ctx->{git}, $path); +} + +sub mbox_results { + my ($ctx) = @_; + if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) { + searcher($ctx) or return need_search($ctx); + require PublicInbox::SearchView; + return PublicInbox::SearchView::mbox_results($ctx); + } + r404(); } sub serve_mbox_range { - my ($self, $ctx, $inbox, $range) = @_; - invalid_inbox($self, $ctx, $inbox) || eval { + my ($ctx, $inbox, $range) = @_; + invalid_inbox($ctx, $inbox) || eval { require PublicInbox::Mbox; searcher($ctx); PublicInbox::Mbox::emit_range($ctx, $range); @@ -397,10 +425,10 @@ sub serve_mbox_range { sub news_www { my ($self) = @_; - my $nw = $self->{news_www}; - return $nw if $nw; - require PublicInbox::NewsWWW; - $self->{news_www} = PublicInbox::NewsWWW->new($self->{pi_config}); + $self->{news_www} ||= do { + require PublicInbox::NewsWWW; + PublicInbox::NewsWWW->new($self->{pi_config}); + } } sub get_attach {