X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWWW.pm;h=508abf73856c3daf08614a9342b91670280cad5f;hb=f76f265a851944b5dedcc3be5f3b5224b6ebda89;hp=a01271b1c9291460a86f8011470e83f4ad770ac9;hpb=28ee19c32a1ecf8e22f30e8f9de860695f4fb30c;p=public-inbox.git diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index a01271b1..508abf73 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2014, Eric Wong and all contributors +# Copyright (C) 2014-2015 all contributors # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) # # We focus on the lowest common denominators here: @@ -16,14 +16,14 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape); use constant SSOMA_URL => 'http://ssoma.public-inbox.org/'; use constant PI_URL => 'http://public-inbox.org/'; our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; +our $MID_RE = qr!([^/]+)!; +our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; our $pi_config; -BEGIN { - $pi_config = PublicInbox::Config->new; -} sub run { my ($cgi, $method) = @_; - my %ctx = (cgi => $cgi, pi_config => $pi_config); + $pi_config ||= PublicInbox::Config->new; + my $ctx = { cgi => $cgi, pi_config => $pi_config }; if ($method !~ /\AGET|HEAD\z/) { return r(405, 'Method Not Allowed'); } @@ -33,53 +33,25 @@ sub run { if ($path_info eq '/') { r404(); } elsif ($path_info =~ m!$LISTNAME_RE\z!o) { - invalid_list(\%ctx, $1) || redirect_list_index($cgi); + invalid_list($ctx, $1) || r301($ctx, $1); } elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) { - invalid_list(\%ctx, $1) || get_index(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/atom\.xml\z!o) { - invalid_list(\%ctx, $1) || get_atom(\%ctx); + invalid_list($ctx, $1) || get_index($ctx); + } elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) { + invalid_list($ctx, $1) || get_atom($ctx); - # single-message pages - } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx); + } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) { + msg_page($ctx, $1, $2, $3); - # full-message page - } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx); + # in case people leave off the trailing slash: + } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/(f|T|t)\z!o) { + r301($ctx, $1, $2, $3 eq 't' ? 't/#u' : $3); - # thread display - } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx); - - } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!x) { - my $sfx = $3; - invalid_list_mid(\%ctx, $1, $2) || - get_thread_mbox(\%ctx, $sfx); - - # legacy redirects - } elsif ($path_info =~ m!$LISTNAME_RE/(t|m|f)/(\S+)\.html\z!o) { - my $pfx = $2; - invalid_list_mid(\%ctx, $1, $3) || - redirect_mid(\%ctx, $pfx, qr/\.html\z/, '/'); - } elsif ($path_info =~ m!$LISTNAME_RE/(m|f)/(\S+)\.txt\z!o) { - my $pfx = $2; - invalid_list_mid(\%ctx, $1, $3) || - redirect_mid(\%ctx, $pfx, qr/\.txt\z/, '/raw'); - } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) { - my $end = $3; - invalid_list_mid(\%ctx, $1, $2) || - redirect_mid(\%ctx, 't', $end, '/mbox.gz'); - - # convenience redirects, order matters - } elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t|s)/(\S+)\z!o) { - my $pfx = $2; - invalid_list_mid(\%ctx, $1, $3) || - redirect_mid(\%ctx, $pfx, qr/\z/, '/'); + # convenience redirects order matters + } elsif ($path_info =~ m!$LISTNAME_RE/([^/]{2,})\z!o) { + r301($ctx, $1, $2); } else { - r404(); + legacy_redirects($ctx, $path_info); } } @@ -95,6 +67,7 @@ sub preload { eval { require PublicInbox::Search; + require PublicInbox::SearchView; require PublicInbox::Mbox; require IO::Compress::Gzip; }; @@ -102,7 +75,15 @@ sub preload { # private functions below -sub r404 { r(404, 'Not Found') } +sub r404 { + my ($ctx) = @_; + if ($ctx && $ctx->{mid}) { + require PublicInbox::ExtMsg; + searcher($ctx); + return PublicInbox::ExtMsg::ext_msg($ctx); + } + r(404, 'Not Found'); +} # simple response for errors sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } @@ -123,11 +104,21 @@ sub invalid_list { sub invalid_list_mid { my ($ctx, $listname, $mid) = @_; my $ret = invalid_list($ctx, $listname, $mid); - $ctx->{mid} = uri_unescape($mid) unless $ret; - $ret; + return $ret if $ret; + + $ctx->{mid} = $mid = uri_unescape($mid); + if ($mid =~ /\A[a-f0-9]{40}\z/) { + if ($mid = mid2blob($ctx)) { + require Email::Simple; + use PublicInbox::MID qw/mid_clean/; + $mid = Email::Simple->new($mid); + $ctx->{mid} = mid_clean($mid->header('Message-ID')); + } + } + undef; } -# /$LISTNAME/atom.xml -> Atom feed, includes replies +# /$LISTNAME/new.atom -> Atom feed, includes replies sub get_atom { my ($ctx) = @_; require PublicInbox::Feed; @@ -139,8 +130,14 @@ sub get_index { my ($ctx) = @_; require PublicInbox::Feed; my $srch = searcher($ctx); + my $q = $ctx->{cgi}->param('q'); footer($ctx); - PublicInbox::Feed::generate_html_index($ctx); + if (defined $q) { + require PublicInbox::SearchView; + PublicInbox::SearchView::sres_top_html($ctx, $q); + } else { + PublicInbox::Feed::generate_html_index($ctx); + } } # just returns a string ref for the blob in the current ctx @@ -162,33 +159,32 @@ sub mid2blob { } } -# /$LISTNAME/m/$MESSAGE_ID.txt -> raw mbox +# /$LISTNAME/$MESSAGE_ID/raw -> raw mbox sub get_mid_txt { my ($ctx) = @_; - my $x = mid2blob($ctx) or return r404(); + my $x = mid2blob($ctx) or return r404($ctx); require PublicInbox::Mbox; PublicInbox::Mbox::emit1($x); } -# /$LISTNAME/m/$MESSAGE_ID.html -> HTML content (short quotes) +# /$LISTNAME/$MESSAGE_ID/ -> HTML content (short quotes) sub get_mid_html { my ($ctx) = @_; - my $x = mid2blob($ctx) or return r404(); + my $x = mid2blob($ctx) or return r404($ctx); require PublicInbox::View; - my $pfx = msg_pfx($ctx); my $foot = footer($ctx); require Email::MIME; my $mime = Email::MIME->new($x); searcher($ctx); [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ], - [ PublicInbox::View::msg_html($ctx, $mime, $pfx, $foot) ] ]; + [ PublicInbox::View::msg_html($ctx, $mime, 'f/', $foot) ] ]; } -# /$LISTNAME/f/$MESSAGE_ID.html -> HTML content (fullquotes) +# /$LISTNAME/$MESSAGE_ID/f/ -> HTML content (fullquotes) sub get_full_html { my ($ctx) = @_; - my $x = mid2blob($ctx) or return r404(); + my $x = mid2blob($ctx) or return r404($ctx); require PublicInbox::View; my $foot = footer($ctx); @@ -199,12 +195,13 @@ sub get_full_html { [ PublicInbox::View::msg_html($ctx, $mime, undef, $foot)] ]; } -# /$LISTNAME/t/$MESSAGE_ID.html +# /$LISTNAME/$MESSAGE_ID/t/ sub get_thread { - my ($ctx) = @_; + my ($ctx, $flat) = @_; my $srch = searcher($ctx) or return need_search($ctx); require PublicInbox::View; my $foot = footer($ctx); + $ctx->{flat} = $flat; PublicInbox::View::thread_html($ctx, $foot, $srch); } @@ -213,39 +210,6 @@ sub self_url { ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string; } -sub redirect_list_index { - my ($cgi) = @_; - do_redirect(self_url($cgi) . "/"); -} - -sub redirect_mid { - my ($ctx, $pfx, $old, $sfx) = @_; - my $url = self_url($ctx->{cgi}); - my $anchor = ''; - if (lc($pfx) eq 't' && $sfx eq '/') { - $anchor = '#u'; # is used to highlight in View.pm - } - $url =~ s/$old/$sfx/; - do_redirect($url . $anchor); -} - -# only hit when somebody tries to guess URLs manually: -sub redirect_mid_txt { - my ($ctx, $pfx) = @_; - my $listname = $ctx->{listname}; - my $url = self_url($ctx->{cgi}); - $url =~ s!/$listname/f/(\S+\.txt)\z!/$listname/m/$1!; - do_redirect($url); -} - -sub do_redirect { - my ($url) = @_; - [ 301, - [ Location => $url, 'Content-Type' => 'text/plain' ], - [ "Redirecting to $url\n" ] - ] -} - sub ctx_get { my ($ctx, $key) = @_; my $val = $ctx->{$key}; @@ -332,14 +296,8 @@ EOF [ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ]; } -sub msg_pfx { - my ($ctx) = @_; - my $href = PublicInbox::Hval::ascii_html(uri_escape_utf8($ctx->{mid})); - "../../f/$href/"; -} - -# /$LISTNAME/t/$MESSAGE_ID/mbox -> thread as mbox -# /$LISTNAME/t/$MESSAGE_ID/mbox.gz -> thread as gzipped mbox +# /$LISTNAME/$MESSAGE_ID/t.mbox -> thread as mbox +# /$LISTNAME/$MESSAGE_ID/t.mbox.gz -> thread as gzipped mbox # note: I'm not a big fan of other compression formats since they're # significantly more expensive on CPU than gzip and less-widely available, # especially on older systems. Stick to zlib since that's what git uses. @@ -350,4 +308,104 @@ sub get_thread_mbox { PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx); } + +# /$LISTNAME/$MESSAGE_ID/t.atom -> thread as Atom feed +sub get_thread_atom { + my ($ctx) = @_; + searcher($ctx) or return need_search($ctx); + $ctx->{self_url} = self_url($ctx->{cgi}); + require PublicInbox::Feed; + PublicInbox::Feed::generate_thread_atom($ctx); +} + +sub legacy_redirects { + my ($ctx, $path_info) = @_; + + # single-message pages + if ($path_info =~ m!$LISTNAME_RE/m/(\S+)/\z!o) { + r301($ctx, $1, $2); + } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) { + r301($ctx, $1, $2, 'raw'); + + } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) { + r301($ctx, $1, $2, 'f/'); + + # thread display + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) { + r301($ctx, $1, $2, 't/#u'); + + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!o) { + r301($ctx, $1, $2, "t.mbox$3"); + + # even older legacy redirects + } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) { + r301($ctx, $1, $2); + + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) { + r301($ctx, $1, $2, 't/#u'); + + } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) { + r301($ctx, $1, $2, 'f/'); + + } elsif ($path_info =~ m!$LISTNAME_RE/(?:m|f)/(\S+)\.txt\z!o) { + r301($ctx, $1, $2, 'raw'); + + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) { + r301($ctx, $1, $2, "t$3"); + + # legacy convenience redirects, order still matters + } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\z!o) { + r301($ctx, $1, $2); + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\z!o) { + r301($ctx, $1, $2, 't/#u'); + } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\z!o) { + r301($ctx, $1, $2, 'f/'); + + # some Message-IDs have slashes in them and the HTTP server + # may try to be clever and unescape them :< + } elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/$END_RE\z!o) { + msg_page($ctx, $1, $2, $3); + + # in case people leave off the trailing slash: + } elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/(f|T|t)\z!o) { + r301($ctx, $1, $2, $3 eq 't' ? 't/#u' : $3); + } else { + r404(); + } +} + +sub r301 { + my ($ctx, $listname, $mid, $suffix) = @_; + my $cgi = $ctx->{cgi}; + my $url; + if (ref($cgi) eq 'CGI') { + $url = $cgi->url(-base) . '/'; + } else { + $url = $cgi->base->as_string; + } + + $url .= $listname . '/'; + $url .= (uri_escape_utf8($mid) . '/') if (defined $mid); + $url .= $suffix if (defined $suffix); + + [ 301, + [ Location => $url, 'Content-Type' => 'text/plain' ], + [ "Redirecting to $url\n" ] ] +} + +sub msg_page { + my ($ctx, $list, $mid, $e) = @_; + unless (invalid_list_mid($ctx, $list, $mid)) { + '' eq $e and return get_mid_html($ctx); + 't/' eq $e and return get_thread($ctx); + 't.atom' eq $e and return get_thread_atom($ctx); + 't.mbox' eq $e and return get_thread_mbox($ctx); + 't.mbox.gz' eq $e and return get_thread_mbox($ctx, '.gz'); + 'T/' eq $e and return get_thread($ctx, 1); + 'raw' eq $e and return get_mid_txt($ctx); + 'f/' eq $e and return get_full_html($ctx); + } + r404($ctx); +} + 1;