X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWWW.pm;h=8e1b1afe9c6c6872be0fa8fc9f195a81ad75a3a8;hb=0b1de991a099b5e8b9a9e3e85b5eaaacc9362dbb;hp=f5ed271e3af064b7d7aa0fd781de55570cb6c9ec;hpb=d0a0d7a3c59c512f3762e8850bdacd8a4395bae9;p=public-inbox.git diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index f5ed271e..8e1b1afe 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -6,23 +6,30 @@ # We focus on the lowest common denominators here: # - targeted at text-only console browsers (w3m, links, etc..) # - Only basic HTML, CSS only for line-wrapping
text content for GUIs +# and diff/syntax-highlighting (optional) # - No JavaScript, graphics or icons allowed. # - Must not rely on static content # - UTF-8 is only for user-content, 7-bit US-ASCII for us package PublicInbox::WWW; -use 5.008; +use 5.010_001; use strict; use warnings; +use bytes (); # only for bytes::length +use Plack::Util; use PublicInbox::Config; use PublicInbox::Hval; use URI::Escape qw(uri_unescape); use PublicInbox::MID qw(mid_escape); require PublicInbox::Git; use PublicInbox::GitHTTPBackend; -our $INBOX_RE = qr!\A/([\w\.\-]+)!; +use PublicInbox::UserContent; + +# TODO: consider a routing tree now that we have more endpoints: +our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i; +our $OID_RE = qr![a-f0-9]{7,40}!; sub new { my ($class, $pi_config) = @_; @@ -36,6 +43,17 @@ sub run { PublicInbox::WWW->new->call($req->env); } +my %path_re_cache; + +sub path_re ($) { + my $sn = $_[0]->{SCRIPT_NAME}; + $path_re_cache{$sn} ||= do { + $sn = '/'.$sn unless index($sn, '/') == 0; + $sn =~ s!/\z!!; + qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!; + }; +} + sub call { my ($self, $env) = @_; my $ctx = { env => $env, www => $self }; @@ -50,14 +68,16 @@ sub call { } split(/[&;]+/, $env->{QUERY_STRING}); $ctx->{qp} = \%qp; - my $path_info = $env->{PATH_INFO}; + # avoiding $env->{PATH_INFO} here since that's already decoded + my ($path_info) = ($env->{REQUEST_URI} =~ path_re($env)); + $path_info //= $env->{PATH_INFO}; my $method = $env->{REQUEST_METHOD}; if ($method eq 'POST') { - if ($path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) { - my $path = $2; + if ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?(git-upload-pack)\z!) { + my ($part, $path) = ($2, $3); return invalid_inbox($ctx, $1) || - serve_git($ctx, $path); + serve_git($ctx, $part, $path); } elsif ($path_info =~ m!$INBOX_RE/!o) { return invalid_inbox($ctx, $1) || mbox_results($ctx); } @@ -68,7 +88,7 @@ sub call { # top-level indices and feeds if ($path_info eq '/') { - r404(); + www_listing($self)->call($env); } elsif ($path_info =~ m!$INBOX_RE\z!o) { invalid_inbox($ctx, $1) || r301($ctx, $1); } elsif ($path_info =~ m!$INBOX_RE(?:/|/index\.html)?\z!o) { @@ -77,10 +97,10 @@ sub call { invalid_inbox($ctx, $1) || get_atom($ctx); } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) { invalid_inbox($ctx, $1) || get_new($ctx); - } elsif ($path_info =~ m!$INBOX_RE/ + } elsif ($path_info =~ m!$INBOX_RE/(?:(\d+)/)? ($PublicInbox::GitHTTPBackend::ANY)\z!ox) { - my $path = $2; - invalid_inbox($ctx, $1) || serve_git($ctx, $path); + my ($part, $path) = ($2, $3); + invalid_inbox($ctx, $1) || serve_git($ctx, $part, $path); } elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) { serve_mbox_range($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) { @@ -91,19 +111,26 @@ sub call { invalid_inbox_mid($ctx, $1, $2) || get_attach($ctx, $idx, $fn); # in case people leave off the trailing slash: } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(T|t)\z!o) { - my ($inbox, $mid, $suffix) = ($1, $2, $3); + my ($inbox, $mid_ue, $suffix) = ($1, $2, $3); $suffix .= $suffix =~ /\A[tT]\z/ ? '/#u' : '/'; - r301($ctx, $inbox, $mid, $suffix); + r301($ctx, $inbox, $mid_ue, $suffix); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/R/?\z!o) { - my ($inbox, $mid) = ($1, $2); - r301($ctx, $inbox, $mid, '#R'); + my ($inbox, $mid_ue) = ($1, $2); + r301($ctx, $inbox, $mid_ue, '#R'); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) { get_text($ctx, $1, $2); - + } elsif ($path_info =~ m!$INBOX_RE/([\w\-\.]+)\.css\z!o) { + get_css($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) { + get_vcs_object($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) { + get_vcs_object($ctx, $1, $2, $3); + } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) { + r301($ctx, $1, $2, 's/'); # convenience redirects order matters } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) { r301($ctx, $1, $2); @@ -115,18 +142,27 @@ sub call { # for CoW-friendliness, MOOOOO! sub preload { + my ($self) = @_; require PublicInbox::Feed; require PublicInbox::View; require PublicInbox::SearchThread; require PublicInbox::MIME; require Digest::SHA; require POSIX; - - foreach (qw(PublicInbox::Search PublicInbox::SearchView + eval { + require PublicInbox::Search; + PublicInbox::Search::load_xapian(); + }; + foreach (qw(PublicInbox::SearchView PublicInbox::Mbox IO::Compress::Gzip PublicInbox::NewsWWW)) { eval "require $_;"; } + if (ref($self)) { + $self->cgit; + $self->stylesheets_prepare($_) for ('', '../', '../../'); + $self->www_listing; + } } # private functions below @@ -135,7 +171,6 @@ sub r404 { my ($ctx) = @_; if ($ctx && $ctx->{mid}) { require PublicInbox::ExtMsg; - searcher($ctx); return PublicInbox::ExtMsg::ext_msg($ctx); } r(404, 'Not Found'); @@ -144,14 +179,20 @@ sub r404 { # simple response for errors sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } +sub news_cgit_fallback ($) { + my ($ctx) = @_; + my $www = $ctx->{www}; + my $env = $ctx->{env}; + my $res = $www->news_www->call($env); + $res->[0] == 404 ? $www->cgit->call($env) : $res; +} + # returns undef if valid, array ref response if invalid sub invalid_inbox ($$) { my ($ctx, $inbox) = @_; - my $www = $ctx->{www}; - my $obj = $www->{pi_config}->lookup_name($inbox); - if (defined $obj) { - $ctx->{git} = $obj->git; - $ctx->{-inbox} = $obj; + my $ibx = $ctx->{www}->{pi_config}->lookup_name($inbox); + if (defined $ibx) { + $ctx->{-inbox} = $ibx; return; } @@ -159,24 +200,25 @@ sub invalid_inbox ($$) { # generation and link things intended for nntp:// to https?://, # so try to infer links and redirect them to the appropriate # list URL. - $www->news_www->call($ctx->{env}); + news_cgit_fallback($ctx); } # returns undef if valid, array ref response if invalid sub invalid_inbox_mid { - my ($ctx, $inbox, $mid) = @_; + my ($ctx, $inbox, $mid_ue) = @_; my $ret = invalid_inbox($ctx, $inbox); return $ret if $ret; - $ctx->{mid} = $mid; - if ($mid =~ /\A[a-f0-9]{40}\z/) { - # this is horiffically wasteful for legacy URLs: - if ($mid = mid2blob($ctx)) { - require Email::Simple; - use PublicInbox::MID qw/mid_clean/; - my $s = Email::Simple->new($mid); - $ctx->{mid} = mid_clean($s->header('Message-ID')); - } + my $mid = $ctx->{mid} = uri_unescape($mid_ue); + my $ibx = $ctx->{-inbox}; + if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) { + my ($x2, $x38) = ($1, $2); + # this is horrifically wasteful for legacy URLs: + my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return; + require Email::Simple; + my $s = Email::Simple->new($str); + $mid = PublicInbox::MID::mid_clean($s->header('Message-ID')); + return r301($ctx, $inbox, mid_escape($mid)); } undef; } @@ -199,7 +241,6 @@ sub get_new { sub get_index { my ($ctx) = @_; require PublicInbox::Feed; - searcher($ctx); if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) { require PublicInbox::SearchView; PublicInbox::SearchView::sres_top_html($ctx); @@ -208,12 +249,6 @@ sub get_index { } } -# just returns a string ref for the blob in the current ctx -sub mid2blob { - my ($ctx) = @_; - $ctx->{-inbox}->msg_by_mid($ctx->{mid}); -} - # /$INBOX/$MESSAGE_ID/raw -> raw mbox sub get_mid_txt { my ($ctx) = @_; @@ -231,7 +266,7 @@ sub get_mid_html { # /$INBOX/$MESSAGE_ID/t/ sub get_thread { my ($ctx, $flat) = @_; - searcher($ctx) or return need_search($ctx); + $ctx->{-inbox}->over or return need($ctx, 'Overview'); $ctx->{flat} = $flat; require PublicInbox::View; PublicInbox::View::thread_html($ctx); @@ -249,6 +284,18 @@ sub get_text { PublicInbox::WwwText::get_text($ctx, $key); } +# show git objects (blobs and commits) +# /$INBOX/_/$OBJECT_ID/show +# /$INBOX/_/${OBJECT_ID}_${FILENAME} +# KEY may contain slashes +sub get_vcs_object ($$$;$) { + my ($ctx, $inbox, $oid, $filename) = @_; + my $r404 = invalid_inbox($ctx, $inbox); + return $r404 if $r404; + require PublicInbox::ViewVCS; + PublicInbox::ViewVCS::show($ctx, $oid, $filename); +} + sub ctx_get { my ($ctx, $key) = @_; my $val = $ctx->{$key}; @@ -256,21 +303,11 @@ sub ctx_get { $val; } -# search support is optional, returns undef if Xapian is not installed -# or not configured for the given GIT_DIR -sub searcher { - my ($ctx) = @_; - eval { - require PublicInbox::Search; - $ctx->{srch} = $ctx->{-inbox}->search; - }; -} - -sub need_search { - my ($ctx) = @_; +sub need { + my ($ctx, $extra) = @_; my $msg = <Search not available for this -public-inbox Search is not available for this public-inbox +$extra not available for this +public-inbox $extra is not available for this public-inbox Return to indexEOF [ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ]; @@ -283,16 +320,16 @@ EOF # especially on older systems. Stick to zlib since that's what git uses. sub get_thread_mbox { my ($ctx, $sfx) = @_; - my $srch = searcher($ctx) or return need_search($ctx); + my $over = $ctx->{-inbox}->over or return need($ctx, 'Overview'); require PublicInbox::Mbox; - PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx); + PublicInbox::Mbox::thread_mbox($ctx, $over, $sfx); } # /$INBOX/$MESSAGE_ID/t.atom -> thread as Atom feed sub get_thread_atom { my ($ctx) = @_; - searcher($ctx) or return need_search($ctx); + $ctx->{-inbox}->over or return need($ctx, 'Overview'); require PublicInbox::Feed; PublicInbox::Feed::generate_thread_atom($ctx); } @@ -351,21 +388,25 @@ sub legacy_redirects { } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/f\z!o) { r301($ctx, $1, $2); } else { - $ctx->{www}->news_www->call($ctx->{env}); + news_cgit_fallback($ctx); } } sub r301 { - my ($ctx, $inbox, $mid, $suffix) = @_; - my $obj = $ctx->{-inbox}; - unless ($obj) { + my ($ctx, $inbox, $mid_ue, $suffix) = @_; + my $ibx = $ctx->{-inbox}; + unless ($ibx) { my $r404 = invalid_inbox($ctx, $inbox); return $r404 if $r404; - $obj = $ctx->{-inbox}; + $ibx = $ctx->{-inbox}; } - my $url = $obj->base_url($ctx->{env}); + my $url = $ibx->base_url($ctx->{env}); my $qs = $ctx->{env}->{QUERY_STRING}; - $url .= (mid_escape($mid) . '/') if (defined $mid); + if (defined $mid_ue) { + # common, and much nicer as '@' than '%40': + $mid_ue =~ s/%40/@/g; + $url .= $mid_ue . '/'; + } $url .= $suffix if (defined $suffix); $url .= "?$qs" if $qs ne ''; @@ -375,9 +416,9 @@ sub r301 { } sub msg_page { - my ($ctx, $inbox, $mid, $e) = @_; + my ($ctx, $inbox, $mid_ue, $e) = @_; my $ret; - $ret = invalid_inbox_mid($ctx, $inbox, $mid) and return $ret; + $ret = invalid_inbox_mid($ctx, $inbox, $mid_ue) and return $ret; '' eq $e and return get_mid_html($ctx); 'T/' eq $e and return get_thread($ctx, 1); 't/' eq $e and return get_thread($ctx); @@ -392,14 +433,17 @@ sub msg_page { } sub serve_git { - my ($ctx, $path) = @_; - PublicInbox::GitHTTPBackend::serve($ctx->{env}, $ctx->{git}, $path); + my ($ctx, $part, $path) = @_; + my $env = $ctx->{env}; + my $ibx = $ctx->{-inbox}; + my $git = defined $part ? $ibx->git_part($part) : $ibx->git; + $git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404(); } sub mbox_results { my ($ctx) = @_; if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) { - searcher($ctx) or return need_search($ctx); + $ctx->{-inbox}->search or return need($ctx, 'search'); require PublicInbox::SearchView; return PublicInbox::SearchView::mbox_results($ctx); } @@ -410,7 +454,6 @@ sub serve_mbox_range { my ($ctx, $inbox, $range) = @_; invalid_inbox($ctx, $inbox) || eval { require PublicInbox::Mbox; - searcher($ctx); PublicInbox::Mbox::emit_range($ctx, $range); } } @@ -423,10 +466,164 @@ sub news_www { } } +sub cgit { + my ($self) = @_; + $self->{cgit} ||= do { + my $pi_config = $self->{pi_config}; + + if (defined($pi_config->{'publicinbox.cgitrc'})) { + require PublicInbox::Cgit; + PublicInbox::Cgit->new($pi_config); + } else { + Plack::Util::inline_object(call => sub { r404() }); + } + } +} + +sub www_listing { + my ($self) = @_; + $self->{www_listing} ||= do { + require PublicInbox::WwwListing; + PublicInbox::WwwListing->new($self); + } +} + sub get_attach { my ($ctx, $idx, $fn) = @_; require PublicInbox::WwwAttach; PublicInbox::WwwAttach::get_attach($ctx, $idx, $fn); } +# User-generated content (UGC) may have excessively long lines +# and screw up rendering on some browsers, so we use pre-wrap. +# +# We also force everything to the same scaled font-size because GUI +# browsers (tested both Firefox and surf (webkit)) uses a larger font +# for the Search