X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWWW.pm;h=268c5b8c7de8e49872cb6333b869b5f3c3170fdb;hb=f3b60bf095846ce9290b94a7b1d700ed7bf0f316;hp=24e24f1ee810554fbefc033f36d5da2bd5365193;hpb=cfb8d16578e7f2f2e300f9f436205e4a8fc7f322;p=public-inbox.git diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 24e24f1e..268c5b8c 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -6,23 +6,30 @@ # We focus on the lowest common denominators here: # - targeted at text-only console browsers (w3m, links, etc..) # - Only basic HTML, CSS only for line-wrapping
text content for GUIs +# and diff/syntax-highlighting (optional) # - No JavaScript, graphics or icons allowed. # - Must not rely on static content # - UTF-8 is only for user-content, 7-bit US-ASCII for us package PublicInbox::WWW; -use 5.008; +use 5.010_001; use strict; use warnings; +use bytes (); # only for bytes::length +use Plack::Util; use PublicInbox::Config; use PublicInbox::Hval; use URI::Escape qw(uri_unescape); use PublicInbox::MID qw(mid_escape); require PublicInbox::Git; use PublicInbox::GitHTTPBackend; -our $INBOX_RE = qr!\A/([\w\.\-]+)!; +use PublicInbox::UserContent; + +# TODO: consider a routing tree now that we have more endpoints: +our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i; +our $OID_RE = qr![a-f0-9]{7,40}!; sub new { my ($class, $pi_config) = @_; @@ -36,6 +43,17 @@ sub run { PublicInbox::WWW->new->call($req->env); } +my %path_re_cache; + +sub path_re ($) { + my $sn = $_[0]->{SCRIPT_NAME}; + $path_re_cache{$sn} ||= do { + $sn = '/'.$sn unless index($sn, '/') == 0; + $sn =~ s!/\z!!; + qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!; + }; +} + sub call { my ($self, $env) = @_; my $ctx = { env => $env, www => $self }; @@ -50,7 +68,8 @@ sub call { } split(/[&;]+/, $env->{QUERY_STRING}); $ctx->{qp} = \%qp; - my $path_info = $env->{PATH_INFO}; + # not using $env->{PATH_INFO} here since that's already decoded + my ($path_info) = ($env->{REQUEST_URI} =~ path_re($env)); my $method = $env->{REQUEST_METHOD}; if ($method eq 'POST') { @@ -91,19 +110,26 @@ sub call { invalid_inbox_mid($ctx, $1, $2) || get_attach($ctx, $idx, $fn); # in case people leave off the trailing slash: } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(T|t)\z!o) { - my ($inbox, $mid, $suffix) = ($1, $2, $3); + my ($inbox, $mid_ue, $suffix) = ($1, $2, $3); $suffix .= $suffix =~ /\A[tT]\z/ ? '/#u' : '/'; - r301($ctx, $inbox, $mid, $suffix); + r301($ctx, $inbox, $mid_ue, $suffix); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/R/?\z!o) { - my ($inbox, $mid) = ($1, $2); - r301($ctx, $inbox, $mid, '#R'); + my ($inbox, $mid_ue) = ($1, $2); + r301($ctx, $inbox, $mid_ue, '#R'); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) { get_text($ctx, $1, $2); - + } elsif ($path_info =~ m!$INBOX_RE/([\w\-\.]+)\.css\z!o) { + get_css($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) { + get_vcs_object($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) { + get_vcs_object($ctx, $1, $2, $3); + } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) { + r301($ctx, $1, $2, 's/'); # convenience redirects order matters } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) { r301($ctx, $1, $2); @@ -115,6 +141,7 @@ sub call { # for CoW-friendliness, MOOOOO! sub preload { + my ($self) = @_; require PublicInbox::Feed; require PublicInbox::View; require PublicInbox::SearchThread; @@ -127,6 +154,10 @@ sub preload { PublicInbox::NewsWWW)) { eval "require $_;"; } + if (ref($self)) { + $self->cgit; + $self->stylesheets_prepare($_) for ('', '../', '../../'); + } } # private functions below @@ -144,14 +175,20 @@ sub r404 { # simple response for errors sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } +sub news_cgit_fallback ($) { + my ($ctx) = @_; + my $www = $ctx->{www}; + my $env = $ctx->{env}; + my $res = $www->news_www->call($env); + $res->[0] == 404 ? $www->cgit->call($env) : $res; +} + # returns undef if valid, array ref response if invalid sub invalid_inbox ($$) { my ($ctx, $inbox) = @_; - my $www = $ctx->{www}; - my $obj = $www->{pi_config}->lookup_name($inbox); - if (defined $obj) { - $ctx->{git} = $obj->git; - $ctx->{-inbox} = $obj; + my $ibx = $ctx->{www}->{pi_config}->lookup_name($inbox); + if (defined $ibx) { + $ctx->{-inbox} = $ibx; return; } @@ -159,16 +196,16 @@ sub invalid_inbox ($$) { # generation and link things intended for nntp:// to https?://, # so try to infer links and redirect them to the appropriate # list URL. - $www->news_www->call($ctx->{env}); + news_cgit_fallback($ctx); } # returns undef if valid, array ref response if invalid sub invalid_inbox_mid { - my ($ctx, $inbox, $mid) = @_; + my ($ctx, $inbox, $mid_ue) = @_; my $ret = invalid_inbox($ctx, $inbox); return $ret if $ret; - $ctx->{mid} = $mid; + my $mid = $ctx->{mid} = uri_unescape($mid_ue); my $ibx = $ctx->{-inbox}; if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) { my ($x2, $x38) = ($1, $2); @@ -177,7 +214,7 @@ sub invalid_inbox_mid { require Email::Simple; my $s = Email::Simple->new($str); $mid = PublicInbox::MID::mid_clean($s->header('Message-ID')); - return r301($ctx, $inbox, $mid); + return r301($ctx, $inbox, mid_escape($mid)); } undef; } @@ -245,6 +282,18 @@ sub get_text { PublicInbox::WwwText::get_text($ctx, $key); } +# show git objects (blobs and commits) +# /$INBOX/_/$OBJECT_ID/show +# /$INBOX/_/${OBJECT_ID}_${FILENAME} +# KEY may contain slashes +sub get_vcs_object ($$$;$) { + my ($ctx, $inbox, $oid, $filename) = @_; + my $r404 = invalid_inbox($ctx, $inbox); + return $r404 if $r404; + require PublicInbox::ViewVCS; + PublicInbox::ViewVCS::show($ctx, $oid, $filename); +} + sub ctx_get { my ($ctx, $key) = @_; my $val = $ctx->{$key}; @@ -347,21 +396,25 @@ sub legacy_redirects { } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/f\z!o) { r301($ctx, $1, $2); } else { - $ctx->{www}->news_www->call($ctx->{env}); + news_cgit_fallback($ctx); } } sub r301 { - my ($ctx, $inbox, $mid, $suffix) = @_; - my $obj = $ctx->{-inbox}; - unless ($obj) { + my ($ctx, $inbox, $mid_ue, $suffix) = @_; + my $ibx = $ctx->{-inbox}; + unless ($ibx) { my $r404 = invalid_inbox($ctx, $inbox); return $r404 if $r404; - $obj = $ctx->{-inbox}; + $ibx = $ctx->{-inbox}; } - my $url = $obj->base_url($ctx->{env}); + my $url = $ibx->base_url($ctx->{env}); my $qs = $ctx->{env}->{QUERY_STRING}; - $url .= (mid_escape($mid) . '/') if (defined $mid); + if (defined $mid_ue) { + # common, and much nicer as '@' than '%40': + $mid_ue =~ s/%40/@/g; + $url .= $mid_ue . '/'; + } $url .= $suffix if (defined $suffix); $url .= "?$qs" if $qs ne ''; @@ -371,9 +424,9 @@ sub r301 { } sub msg_page { - my ($ctx, $inbox, $mid, $e) = @_; + my ($ctx, $inbox, $mid_ue, $e) = @_; my $ret; - $ret = invalid_inbox_mid($ctx, $inbox, $mid) and return $ret; + $ret = invalid_inbox_mid($ctx, $inbox, $mid_ue) and return $ret; '' eq $e and return get_mid_html($ctx); 'T/' eq $e and return get_thread($ctx, 1); 't/' eq $e and return get_thread($ctx); @@ -422,10 +475,156 @@ sub news_www { } } +sub cgit { + my ($self) = @_; + $self->{cgit} ||= do { + my $pi_config = $self->{pi_config}; + + if (defined($pi_config->{'publicinbox.cgitrc'})) { + require PublicInbox::Cgit; + PublicInbox::Cgit->new($pi_config); + } else { + Plack::Util::inline_object(call => sub { r404() }); + } + } +} + sub get_attach { my ($ctx, $idx, $fn) = @_; require PublicInbox::WwwAttach; PublicInbox::WwwAttach::get_attach($ctx, $idx, $fn); } +# User-generated content (UGC) may have excessively long lines +# and screw up rendering on some browsers, so we use pre-wrap. +# +# We also force everything to the same scaled font-size because GUI +# browsers (tested both Firefox and surf (webkit)) uses a larger font +# for the Search