X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWWW.pm;h=508abf73856c3daf08614a9342b91670280cad5f;hb=f76f265a851944b5dedcc3be5f3b5224b6ebda89;hp=eeae3d326b0c5dd4a34dece0401f1e67f4ceca1e;hpb=71040e5ff8a68eb0cfaf20c273e227cdceb9dc23;p=public-inbox.git diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index eeae3d32..508abf73 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2014, Eric Wong and all contributors +# Copyright (C) 2014-2015 all contributors # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) # # We focus on the lowest common denominators here: @@ -16,14 +16,14 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape); use constant SSOMA_URL => 'http://ssoma.public-inbox.org/'; use constant PI_URL => 'http://public-inbox.org/'; our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; +our $MID_RE = qr!([^/]+)!; +our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; our $pi_config; -BEGIN { - $pi_config = PublicInbox::Config->new; -} sub run { my ($cgi, $method) = @_; - my %ctx; + $pi_config ||= PublicInbox::Config->new; + my $ctx = { cgi => $cgi, pi_config => $pi_config }; if ($method !~ /\AGET|HEAD\z/) { return r(405, 'Method Not Allowed'); } @@ -33,43 +33,25 @@ sub run { if ($path_info eq '/') { r404(); } elsif ($path_info =~ m!$LISTNAME_RE\z!o) { - invalid_list(\%ctx, $1) || redirect_list_index(\%ctx, $cgi); + invalid_list($ctx, $1) || r301($ctx, $1); } elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) { - invalid_list(\%ctx, $1) || get_index(\%ctx, $cgi); - } elsif ($path_info =~ m!$LISTNAME_RE/atom\.xml\z!o) { - invalid_list(\%ctx, $1) || get_atom(\%ctx, $cgi); - - # single-message pages - } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx, $cgi); - } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx, $cgi); - - # full-message page - } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx, $cgi); - - # thread display - } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx, $cgi); + invalid_list($ctx, $1) || get_index($ctx); + } elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) { + invalid_list($ctx, $1) || get_atom($ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.mbox(\.gz)?\z!o) { - my $sfx = $3; - invalid_list_mid(\%ctx, $1, $2) || - get_thread_mbox(\%ctx, $cgi, $sfx); + } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) { + msg_page($ctx, $1, $2, $3); - } elsif ($path_info =~ m!$LISTNAME_RE/f/\S+\.txt\z!o) { - invalid_list_mid(\%ctx, $1, $2) || - redirect_mid_txt(\%ctx, $cgi); + # in case people leave off the trailing slash: + } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/(f|T|t)\z!o) { + r301($ctx, $1, $2, $3 eq 't' ? 't/#u' : $3); - # convenience redirects, order matters - } elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t|s)/(\S+)\z!o) { - my $pfx = $2; - invalid_list_mid(\%ctx, $1, $3) || - redirect_mid(\%ctx, $cgi, $2); + # convenience redirects order matters + } elsif ($path_info =~ m!$LISTNAME_RE/([^/]{2,})\z!o) { + r301($ctx, $1, $2); } else { - r404(); + legacy_redirects($ctx, $path_info); } } @@ -85,6 +67,7 @@ sub preload { eval { require PublicInbox::Search; + require PublicInbox::SearchView; require PublicInbox::Mbox; require IO::Compress::Gzip; }; @@ -92,7 +75,15 @@ sub preload { # private functions below -sub r404 { r(404, 'Not Found') } +sub r404 { + my ($ctx) = @_; + if ($ctx && $ctx->{mid}) { + require PublicInbox::ExtMsg; + searcher($ctx); + return PublicInbox::ExtMsg::ext_msg($ctx); + } + r(404, 'Not Found'); +} # simple response for errors sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] } @@ -113,28 +104,40 @@ sub invalid_list { sub invalid_list_mid { my ($ctx, $listname, $mid) = @_; my $ret = invalid_list($ctx, $listname, $mid); - $ctx->{mid} = uri_unescape($mid) unless $ret; - $ret; + return $ret if $ret; + + $ctx->{mid} = $mid = uri_unescape($mid); + if ($mid =~ /\A[a-f0-9]{40}\z/) { + if ($mid = mid2blob($ctx)) { + require Email::Simple; + use PublicInbox::MID qw/mid_clean/; + $mid = Email::Simple->new($mid); + $ctx->{mid} = mid_clean($mid->header('Message-ID')); + } + } + undef; } -# /$LISTNAME/atom.xml -> Atom feed, includes replies +# /$LISTNAME/new.atom -> Atom feed, includes replies sub get_atom { - my ($ctx, $cgi) = @_; - $ctx->{pi_config} = $pi_config; - $ctx->{cgi} = $cgi; + my ($ctx) = @_; require PublicInbox::Feed; PublicInbox::Feed::generate($ctx); } # /$LISTNAME/?r=$GIT_COMMIT -> HTML only sub get_index { - my ($ctx, $cgi) = @_; + my ($ctx) = @_; require PublicInbox::Feed; my $srch = searcher($ctx); - $ctx->{pi_config} = $pi_config; - $ctx->{cgi} = $cgi; + my $q = $ctx->{cgi}->param('q'); footer($ctx); - PublicInbox::Feed::generate_html_index($ctx); + if (defined $q) { + require PublicInbox::SearchView; + PublicInbox::SearchView::sres_top_html($ctx, $q); + } else { + PublicInbox::Feed::generate_html_index($ctx); + } } # just returns a string ref for the blob in the current ctx @@ -144,7 +147,6 @@ sub mid2blob { my $path = PublicInbox::MID::mid2path($ctx->{mid}); my @cmd = ('git', "--git-dir=$ctx->{git_dir}", qw(cat-file blob), "HEAD:$path"); - my $cmd = join(' ', @cmd); my $pid = open my $fh, '-|'; defined $pid or die "fork failed: $!\n"; if ($pid == 0) { @@ -157,35 +159,33 @@ sub mid2blob { } } -# /$LISTNAME/m/$MESSAGE_ID.txt -> raw mbox +# /$LISTNAME/$MESSAGE_ID/raw -> raw mbox sub get_mid_txt { - my ($ctx, $cgi) = @_; - my $x = mid2blob($ctx) or return r404(); + my ($ctx) = @_; + my $x = mid2blob($ctx) or return r404($ctx); require PublicInbox::Mbox; PublicInbox::Mbox::emit1($x); } -# /$LISTNAME/m/$MESSAGE_ID.html -> HTML content (short quotes) +# /$LISTNAME/$MESSAGE_ID/ -> HTML content (short quotes) sub get_mid_html { - my ($ctx, $cgi) = @_; - my $x = mid2blob($ctx); - return r404() unless $x; + my ($ctx) = @_; + my $x = mid2blob($ctx) or return r404($ctx); require PublicInbox::View; - my $pfx = msg_pfx($ctx); my $foot = footer($ctx); require Email::MIME; my $mime = Email::MIME->new($x); searcher($ctx); [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ], - [ PublicInbox::View::msg_html($ctx, $mime, $pfx, $foot) ] ]; + [ PublicInbox::View::msg_html($ctx, $mime, 'f/', $foot) ] ]; } -# /$LISTNAME/f/$MESSAGE_ID.html -> HTML content (fullquotes) +# /$LISTNAME/$MESSAGE_ID/f/ -> HTML content (fullquotes) sub get_full_html { - my ($ctx, $cgi) = @_; - my $x = mid2blob($ctx); - return r404() unless $x; + my ($ctx) = @_; + my $x = mid2blob($ctx) or return r404($ctx); + require PublicInbox::View; my $foot = footer($ctx); require Email::MIME; @@ -195,12 +195,13 @@ sub get_full_html { [ PublicInbox::View::msg_html($ctx, $mime, undef, $foot)] ]; } -# /$LISTNAME/t/$MESSAGE_ID.html +# /$LISTNAME/$MESSAGE_ID/t/ sub get_thread { - my ($ctx, $cgi) = @_; + my ($ctx, $flat) = @_; my $srch = searcher($ctx) or return need_search($ctx); require PublicInbox::View; my $foot = footer($ctx); + $ctx->{flat} = $flat; PublicInbox::View::thread_html($ctx, $foot, $srch); } @@ -209,42 +210,10 @@ sub self_url { ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string; } -sub redirect_list_index { - my ($ctx, $cgi) = @_; - do_redirect(self_url($cgi) . "/"); -} - -sub redirect_mid { - my ($ctx, $cgi, $pfx) = @_; - my $url = self_url($cgi); - my $anchor = ''; - if (lc($pfx) eq 't') { - $anchor = '#u'; # is used to highlight in View.pm - } - do_redirect($url . ".html$anchor"); -} - -# only hit when somebody tries to guess URLs manually: -sub redirect_mid_txt { - my ($ctx, $cgi, $pfx) = @_; - my $listname = $ctx->{listname}; - my $url = self_url($cgi); - $url =~ s!/$listname/f/(\S+\.txt)\z!/$listname/m/$1!; - do_redirect($url); -} - -sub do_redirect { - my ($url) = @_; - [ 301, - [ Location => $url, 'Content-Type' => 'text/plain' ], - [ "Redirecting to $url\n" ] - ] -} - sub ctx_get { my ($ctx, $key) = @_; my $val = $ctx->{$key}; - (defined $val && length $val) or die "BUG: bad ctx, $key unusable\n"; + (defined $val && $val ne '') or die "BUG: bad ctx, $key unusable\n"; $val; } @@ -327,22 +296,116 @@ EOF [ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ]; } -sub msg_pfx { - my ($ctx) = @_; - my $href = PublicInbox::Hval::ascii_html(uri_escape_utf8($ctx->{mid})); - "../f/$href.html"; -} - -# /$LISTNAME/t/$MESSAGE_ID.mbox -> thread as mbox -# /$LISTNAME/t/$MESSAGE_ID.mbox.gz -> thread as gzipped mbox +# /$LISTNAME/$MESSAGE_ID/t.mbox -> thread as mbox +# /$LISTNAME/$MESSAGE_ID/t.mbox.gz -> thread as gzipped mbox # note: I'm not a big fan of other compression formats since they're # significantly more expensive on CPU than gzip and less-widely available, # especially on older systems. Stick to zlib since that's what git uses. sub get_thread_mbox { - my ($ctx, $cgi, $sfx) = @_; + my ($ctx, $sfx) = @_; my $srch = searcher($ctx) or return need_search($ctx); require PublicInbox::Mbox; PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx); } + +# /$LISTNAME/$MESSAGE_ID/t.atom -> thread as Atom feed +sub get_thread_atom { + my ($ctx) = @_; + searcher($ctx) or return need_search($ctx); + $ctx->{self_url} = self_url($ctx->{cgi}); + require PublicInbox::Feed; + PublicInbox::Feed::generate_thread_atom($ctx); +} + +sub legacy_redirects { + my ($ctx, $path_info) = @_; + + # single-message pages + if ($path_info =~ m!$LISTNAME_RE/m/(\S+)/\z!o) { + r301($ctx, $1, $2); + } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) { + r301($ctx, $1, $2, 'raw'); + + } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) { + r301($ctx, $1, $2, 'f/'); + + # thread display + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) { + r301($ctx, $1, $2, 't/#u'); + + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!o) { + r301($ctx, $1, $2, "t.mbox$3"); + + # even older legacy redirects + } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) { + r301($ctx, $1, $2); + + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) { + r301($ctx, $1, $2, 't/#u'); + + } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) { + r301($ctx, $1, $2, 'f/'); + + } elsif ($path_info =~ m!$LISTNAME_RE/(?:m|f)/(\S+)\.txt\z!o) { + r301($ctx, $1, $2, 'raw'); + + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) { + r301($ctx, $1, $2, "t$3"); + + # legacy convenience redirects, order still matters + } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\z!o) { + r301($ctx, $1, $2); + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\z!o) { + r301($ctx, $1, $2, 't/#u'); + } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\z!o) { + r301($ctx, $1, $2, 'f/'); + + # some Message-IDs have slashes in them and the HTTP server + # may try to be clever and unescape them :< + } elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/$END_RE\z!o) { + msg_page($ctx, $1, $2, $3); + + # in case people leave off the trailing slash: + } elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/(f|T|t)\z!o) { + r301($ctx, $1, $2, $3 eq 't' ? 't/#u' : $3); + } else { + r404(); + } +} + +sub r301 { + my ($ctx, $listname, $mid, $suffix) = @_; + my $cgi = $ctx->{cgi}; + my $url; + if (ref($cgi) eq 'CGI') { + $url = $cgi->url(-base) . '/'; + } else { + $url = $cgi->base->as_string; + } + + $url .= $listname . '/'; + $url .= (uri_escape_utf8($mid) . '/') if (defined $mid); + $url .= $suffix if (defined $suffix); + + [ 301, + [ Location => $url, 'Content-Type' => 'text/plain' ], + [ "Redirecting to $url\n" ] ] +} + +sub msg_page { + my ($ctx, $list, $mid, $e) = @_; + unless (invalid_list_mid($ctx, $list, $mid)) { + '' eq $e and return get_mid_html($ctx); + 't/' eq $e and return get_thread($ctx); + 't.atom' eq $e and return get_thread_atom($ctx); + 't.mbox' eq $e and return get_thread_mbox($ctx); + 't.mbox.gz' eq $e and return get_thread_mbox($ctx, '.gz'); + 'T/' eq $e and return get_thread($ctx, 1); + 'raw' eq $e and return get_mid_txt($ctx); + 'f/' eq $e and return get_full_html($ctx); + } + r404($ctx); +} + 1;