-# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors
+# Copyright (C) 2014-2015 all contributors <meta@public-inbox.org>
# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
#
+# Main web interface for mailing list archives
+#
# We focus on the lowest common denominators here:
# - targeted at text-only console browsers (w3m, links, etc..)
# - Only basic HTML, CSS only for line-wrapping <pre> text content for GUIs
use 5.008;
use strict;
use warnings;
-use PublicInbox::Config;
+use PublicInbox::Config qw(try_cat);
use URI::Escape qw(uri_escape_utf8 uri_unescape);
use constant SSOMA_URL => 'http://ssoma.public-inbox.org/';
use constant PI_URL => 'http://public-inbox.org/';
+require PublicInbox::Git;
+use PublicInbox::GitHTTPBackend;
our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
-our $pi_config;
-BEGIN {
- $pi_config = PublicInbox::Config->new;
+our $MID_RE = qr!([^/]+)!;
+our $END_RE = qr!(f/|T/|t/|R/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
+
+sub new {
+ my ($class, $pi_config) = @_;
+ $pi_config ||= PublicInbox::Config->new;
+ bless { pi_config => $pi_config }, $class;
}
+# backwards compatibility, do not use
sub run {
- my ($cgi, $method) = @_;
- my %ctx;
- if ($method !~ /\AGET|HEAD\z/) {
+ my ($req, $method) = @_;
+ PublicInbox::WWW->new->call($req->env);
+}
+
+sub call {
+ my ($self, $env) = @_;
+ my $cgi = Plack::Request->new($env);
+ my $ctx = { cgi => $cgi, pi_config => $self->{pi_config} };
+ my $path_info = $cgi->path_info;
+
+ my $method = $cgi->method;
+ if ($method eq 'POST' &&
+ $path_info =~ m!$LISTNAME_RE/(git-upload-pack)\z!) {
+ my $path = $2;
+ return (invalid_list($ctx, $1) ||
+ serve_git($cgi, $ctx->{git}, $path));
+ }
+ elsif ($method !~ /\AGET|HEAD\z/) {
return r(405, 'Method Not Allowed');
}
- my $path_info = $cgi->path_info;
# top-level indices and feeds
if ($path_info eq '/') {
r404();
} elsif ($path_info =~ m!$LISTNAME_RE\z!o) {
- invalid_list(\%ctx, $1) || redirect_list_index(\%ctx, $cgi);
+ invalid_list($ctx, $1) || r301($ctx, $1);
} elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
- invalid_list(\%ctx, $1) || get_index(\%ctx, $cgi);
- } elsif ($path_info =~ m!$LISTNAME_RE/atom\.xml\z!o) {
- invalid_list(\%ctx, $1) || get_atom(\%ctx, $cgi);
-
- # single-message pages
- } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx, $cgi);
- } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx, $cgi);
-
- # full-message page
- } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx, $cgi);
-
- # thread display
- } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx, $cgi);
-
- } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.mbox\.gz!o) {
- my $sfx = $3;
- invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $cgi);
-
- } elsif ($path_info =~ m!$LISTNAME_RE/f/\S+\.txt\z!o) {
- invalid_list_mid(\%ctx, $1, $2) ||
- redirect_mid_txt(\%ctx, $cgi);
-
- # convenience redirects, order matters
- } elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t|s)/(\S+)\z!o) {
- my $pfx = $2;
- invalid_list_mid(\%ctx, $1, $3) ||
- redirect_mid(\%ctx, $cgi, $2);
+ invalid_list($ctx, $1) || get_index($ctx);
+ } elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) {
+ invalid_list($ctx, $1) || get_atom($ctx);
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/
+ ($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
+ my $path = $2;
+ invalid_list($ctx, $1) || serve_git($cgi, $ctx->{git}, $path);
+ } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) {
+ msg_page($ctx, $1, $2, $3);
+
+ # in case people leave off the trailing slash:
+ } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/(f|T|t|R)\z!o) {
+ my ($listname, $mid, $suffix) = ($1, $2, $3);
+ $suffix .= $suffix =~ /\A[tT]\z/ ? '/#u' : '/';
+ r301($ctx, $listname, $mid, $suffix);
+
+ # convenience redirects order matters
+ } elsif ($path_info =~ m!$LISTNAME_RE/([^/]{2,})\z!o) {
+ r301($ctx, $1, $2);
} else {
- r404();
+ legacy_redirects($ctx, $path_info);
}
}
require Email::MIME;
require Digest::SHA;
require POSIX;
- require XML::Atom::SimpleFeed;
+
+ eval {
+ require PublicInbox::Search;
+ require PublicInbox::SearchView;
+ require PublicInbox::Mbox;
+ require IO::Compress::Gzip;
+ };
}
# private functions below
-sub r404 { r(404, 'Not Found') }
+sub r404 {
+ my ($ctx) = @_;
+ if ($ctx && $ctx->{mid}) {
+ require PublicInbox::ExtMsg;
+ searcher($ctx);
+ return PublicInbox::ExtMsg::ext_msg($ctx);
+ }
+ r(404, 'Not Found');
+}
# simple response for errors
sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] }
# returns undef if valid, array ref response if invalid
sub invalid_list {
my ($ctx, $listname) = @_;
- my $git_dir = $pi_config->get($listname, "mainrepo");
+ my $git_dir = $ctx->{pi_config}->get($listname, "mainrepo");
if (defined $git_dir) {
$ctx->{git_dir} = $git_dir;
+ $ctx->{git} = PublicInbox::Git->new($git_dir);
$ctx->{listname} = $listname;
return;
}
sub invalid_list_mid {
my ($ctx, $listname, $mid) = @_;
my $ret = invalid_list($ctx, $listname, $mid);
- $ctx->{mid} = uri_unescape($mid) unless $ret;
- $ret;
+ return $ret if $ret;
+
+ $ctx->{mid} = $mid = uri_unescape($mid);
+ if ($mid =~ /\A[a-f0-9]{40}\z/) {
+ if ($mid = mid2blob($ctx)) {
+ require Email::Simple;
+ use PublicInbox::MID qw/mid_clean/;
+ $mid = Email::Simple->new($mid);
+ $ctx->{mid} = mid_clean($mid->header('Message-ID'));
+ }
+ }
+ undef;
}
-# /$LISTNAME/atom.xml -> Atom feed, includes replies
+# /$LISTNAME/new.atom -> Atom feed, includes replies
sub get_atom {
- my ($ctx, $cgi) = @_;
+ my ($ctx) = @_;
require PublicInbox::Feed;
- $ctx->{pi_config} = $pi_config;
- $ctx->{cgi} = $cgi;
- [ 200, [ 'Content-Type' => 'application/xml' ],
- [ PublicInbox::Feed->generate($ctx) ] ]
+ PublicInbox::Feed::generate($ctx);
}
# /$LISTNAME/?r=$GIT_COMMIT -> HTML only
sub get_index {
- my ($ctx, $cgi) = @_;
+ my ($ctx) = @_;
require PublicInbox::Feed;
my $srch = searcher($ctx);
- $ctx->{pi_config} = $pi_config;
- $ctx->{cgi} = $cgi;
footer($ctx);
- [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- [ PublicInbox::Feed->generate_html_index($ctx) ] ]
+ if (defined $ctx->{cgi}->param('q')) {
+ require PublicInbox::SearchView;
+ PublicInbox::SearchView::sres_top_html($ctx);
+ } else {
+ PublicInbox::Feed::generate_html_index($ctx);
+ }
}
# just returns a string ref for the blob in the current ctx
my ($ctx) = @_;
require PublicInbox::MID;
my $path = PublicInbox::MID::mid2path($ctx->{mid});
- my @cmd = ('git', "--git-dir=$ctx->{git_dir}",
- qw(cat-file blob), "HEAD:$path");
- my $cmd = join(' ', @cmd);
- my $pid = open my $fh, '-|';
- defined $pid or die "fork failed: $!\n";
- if ($pid == 0) {
- open STDERR, '>', '/dev/null'; # ignore errors
- exec @cmd or die "exec failed: $!\n";
- } else {
- my $blob = eval { local $/; <$fh> };
- close $fh;
- $? == 0 ? \$blob : undef;
- }
+ $ctx->{git}->cat_file("HEAD:$path");
}
-# /$LISTNAME/m/$MESSAGE_ID.txt -> raw original
+# /$LISTNAME/$MESSAGE_ID/raw -> raw mbox
sub get_mid_txt {
- my ($ctx, $cgi) = @_;
- my $x = mid2blob($ctx);
- $x ? [ 200, [ 'Content-Type' => 'text/plain' ], [ $$x ] ] : r404();
+ my ($ctx) = @_;
+ my $x = mid2blob($ctx) or return r404($ctx);
+ require PublicInbox::Mbox;
+ PublicInbox::Mbox::emit1($ctx, $x);
}
-# /$LISTNAME/m/$MESSAGE_ID.html -> HTML content (short quotes)
+# /$LISTNAME/$MESSAGE_ID/ -> HTML content (short quotes)
sub get_mid_html {
- my ($ctx, $cgi) = @_;
- my $x = mid2blob($ctx);
- return r404() unless $x;
+ my ($ctx) = @_;
+ my $x = mid2blob($ctx) or return r404($ctx);
require PublicInbox::View;
- my $pfx = msg_pfx($ctx);
my $foot = footer($ctx);
require Email::MIME;
my $mime = Email::MIME->new($x);
- my $srch = searcher($ctx);
+ searcher($ctx);
[ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- [ PublicInbox::View->msg_html($mime, $pfx, $foot, $srch) ] ];
+ [ PublicInbox::View::msg_html($ctx, $mime, 'f/', $foot) ] ];
}
-# /$LISTNAME/f/$MESSAGE_ID.html -> HTML content (fullquotes)
+# /$LISTNAME/$MESSAGE_ID/f/ -> HTML content (fullquotes)
sub get_full_html {
- my ($ctx, $cgi) = @_;
- my $x = mid2blob($ctx);
- return r404() unless $x;
+ my ($ctx) = @_;
+ my $x = mid2blob($ctx) or return r404($ctx);
+
require PublicInbox::View;
my $foot = footer($ctx);
require Email::MIME;
my $mime = Email::MIME->new($x);
- my $srch = searcher($ctx);
+ searcher($ctx);
[ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- [ PublicInbox::View->msg_html($mime, undef, $foot, $srch)] ];
+ [ PublicInbox::View::msg_html($ctx, $mime, undef, $foot)] ];
}
-# /$LISTNAME/t/$MESSAGE_ID.html
-sub get_thread {
- my ($ctx, $cgi) = @_;
- my $srch = searcher($ctx) or return need_search($ctx);
+# /$LISTNAME/$MESSAGE_ID/R/ -> HTML content (fullquotes)
+sub get_reply_html {
+ my ($ctx) = @_;
+ my $x = mid2blob($ctx) or return r404($ctx);
+
require PublicInbox::View;
my $foot = footer($ctx);
- my $body = PublicInbox::View->thread_html($ctx, $foot, $srch) or
- return r404();
+ require Email::MIME;
+ my $hdr = Email::MIME->new($x)->header_obj;
[ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- [ $body ] ];
-}
-
-sub self_url {
- my ($cgi) = @_;
- ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string;
-}
-
-sub redirect_list_index {
- my ($ctx, $cgi) = @_;
- do_redirect(self_url($cgi) . "/");
-}
-
-sub redirect_mid {
- my ($ctx, $cgi, $pfx) = @_;
- my $url = self_url($cgi);
- my $anchor = '';
- if (lc($pfx) eq 't') {
- $anchor = '#u'; # <u id='#u'> is used to highlight in View.pm
- }
- do_redirect($url . ".html$anchor");
+ [ PublicInbox::View::msg_reply($ctx, $hdr, $foot)] ];
}
-# only hit when somebody tries to guess URLs manually:
-sub redirect_mid_txt {
- my ($ctx, $cgi, $pfx) = @_;
- my $listname = $ctx->{listname};
- my $url = self_url($cgi);
- $url =~ s!/$listname/f/(\S+\.txt)\z!/$listname/m/$1!;
- do_redirect($url);
-}
-
-sub do_redirect {
- my ($url) = @_;
- [ 301,
- [ Location => $url, 'Content-Type' => 'text/plain' ],
- [ "Redirecting to $url\n" ]
- ]
+# /$LISTNAME/$MESSAGE_ID/t/
+sub get_thread {
+ my ($ctx, $flat) = @_;
+ my $srch = searcher($ctx) or return need_search($ctx);
+ require PublicInbox::View;
+ my $foot = footer($ctx);
+ $ctx->{flat} = $flat;
+ PublicInbox::View::thread_html($ctx, $foot, $srch);
}
sub ctx_get {
my ($ctx, $key) = @_;
my $val = $ctx->{$key};
- (defined $val && length $val) or die "BUG: bad ctx, $key unusable\n";
+ (defined $val && $val ne '') or die "BUG: bad ctx, $key unusable\n";
$val;
}
-sub try_cat {
- my ($path) = @_;
- my $rv;
- if (open(my $fh, '<', $path)) {
- local $/;
- $rv = <$fh>;
- close $fh;
- }
- $rv;
-}
-
sub footer {
my ($ctx) = @_;
return '' unless $ctx;
my $urls = try_cat("$git_dir/cloneurl");
my @urls = split(/\r?\n/, $urls || '');
- my $nurls = scalar @urls;
- if ($nurls == 0) {
- $urls = '($GIT_DIR/cloneurl missing)';
- } elsif ($nurls == 1) {
- $urls = "git URL for <a\nhref=\"" . SSOMA_URL .
- '">ssoma</a>: ' . $urls[0];
+ my %seen = map { $_ => 1 } @urls;
+ my $http = $ctx->{cgi}->base->as_string . $listname;
+ $seen{$http} or unshift @urls, $http;
+ if (scalar(@urls) == 1) {
+ $urls = "URL for <a\nhref=\"" . SSOMA_URL .
+ qq(">ssoma</a> or <b>git clone --mirror \$URL</b> :) .
+ $urls[0];
} else {
- $urls = "git URLs for <a\nhref=\"" . SSOMA_URL .
- "\">ssoma</a>:\n" . join("\n", map { "\t$_" } @urls);
+ $urls = "URLs for <a\nhref=\"" . SSOMA_URL .
+ qq(">ssoma</a> or <b>git clone --mirror \$URL</b>\n) .
+ join("\n", map { "\t$_" } @urls);
}
- my $addr = $pi_config->get($listname, 'address');
+ my $addr = $ctx->{pi_config}->get($listname, 'address');
if (ref($addr) eq 'ARRAY') {
$addr = $addr->[0]; # first address is primary
}
[ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ];
}
-sub msg_pfx {
- my ($ctx) = @_;
- my $href = PublicInbox::Hval::ascii_html(uri_escape_utf8($ctx->{mid}));
- "../f/$href.html";
-}
-
-# /$LISTNAME/t/$MESSAGE_ID.mbox.gz -> search results as gzipped mbox
+# /$LISTNAME/$MESSAGE_ID/t.mbox -> thread as mbox
+# /$LISTNAME/$MESSAGE_ID/t.mbox.gz -> thread as gzipped mbox
# note: I'm not a big fan of other compression formats since they're
# significantly more expensive on CPU than gzip and less-widely available,
# especially on older systems. Stick to zlib since that's what git uses.
sub get_thread_mbox {
- my ($ctx, $cgi) = @_;
+ my ($ctx, $sfx) = @_;
my $srch = searcher($ctx) or return need_search($ctx);
require PublicInbox::Mbox;
- PublicInbox::Mbox::thread_mbox($ctx, $srch);
+ PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx);
+}
+
+
+# /$LISTNAME/$MESSAGE_ID/t.atom -> thread as Atom feed
+sub get_thread_atom {
+ my ($ctx) = @_;
+ searcher($ctx) or return need_search($ctx);
+ $ctx->{self_url} = $ctx->{cgi}->uri->as_string;
+ require PublicInbox::Feed;
+ PublicInbox::Feed::generate_thread_atom($ctx);
+}
+
+sub legacy_redirects {
+ my ($ctx, $path_info) = @_;
+
+ # single-message pages
+ if ($path_info =~ m!$LISTNAME_RE/m/(\S+)/\z!o) {
+ r301($ctx, $1, $2);
+ } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) {
+ r301($ctx, $1, $2, 'raw');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) {
+ r301($ctx, $1, $2, 'f/');
+
+ # thread display
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) {
+ r301($ctx, $1, $2, 't/#u');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!o) {
+ r301($ctx, $1, $2, "t.mbox$3");
+
+ # even older legacy redirects
+ } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) {
+ r301($ctx, $1, $2);
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) {
+ r301($ctx, $1, $2, 't/#u');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) {
+ r301($ctx, $1, $2, 'f/');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/(?:m|f)/(\S+)\.txt\z!o) {
+ r301($ctx, $1, $2, 'raw');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) {
+ r301($ctx, $1, $2, "t$3");
+
+ # legacy convenience redirects, order still matters
+ } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\z!o) {
+ r301($ctx, $1, $2);
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\z!o) {
+ r301($ctx, $1, $2, 't/#u');
+ } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\z!o) {
+ r301($ctx, $1, $2, 'f/');
+
+ # some Message-IDs have slashes in them and the HTTP server
+ # may try to be clever and unescape them :<
+ } elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/$END_RE\z!o) {
+ msg_page($ctx, $1, $2, $3);
+
+ # in case people leave off the trailing slash:
+ } elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/(f|T|t)\z!o) {
+ r301($ctx, $1, $2, $3 eq 't' ? 't/#u' : $3);
+ } else {
+ r404();
+ }
+}
+
+sub r301 {
+ my ($ctx, $listname, $mid, $suffix) = @_;
+ my $cgi = $ctx->{cgi};
+ my $url;
+ my $qs = $cgi->env->{QUERY_STRING};
+ $url = $cgi->base->as_string . $listname . '/';
+ $url .= (uri_escape_utf8($mid) . '/') if (defined $mid);
+ $url .= $suffix if (defined $suffix);
+ $url .= "?$qs" if $qs ne '';
+
+ [ 301,
+ [ Location => $url, 'Content-Type' => 'text/plain' ],
+ [ "Redirecting to $url\n" ] ]
+}
+
+sub msg_page {
+ my ($ctx, $list, $mid, $e) = @_;
+ unless (invalid_list_mid($ctx, $list, $mid)) {
+ '' eq $e and return get_mid_html($ctx);
+ 't/' eq $e and return get_thread($ctx);
+ 't.atom' eq $e and return get_thread_atom($ctx);
+ 't.mbox' eq $e and return get_thread_mbox($ctx);
+ 't.mbox.gz' eq $e and return get_thread_mbox($ctx, '.gz');
+ 'T/' eq $e and return get_thread($ctx, 1);
+ 'raw' eq $e and return get_mid_txt($ctx);
+ 'f/' eq $e and return get_full_html($ctx);
+ 'R/' eq $e and return get_reply_html($ctx);
+ }
+ r404($ctx);
+}
+
+sub serve_git {
+ my ($cgi, $git, $path) = @_;
+ PublicInbox::GitHTTPBackend::serve($cgi, $git, $path);
}
1;