# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
#
# We focus on the lowest common denominators here:
-# - targeted at text-only console browsers (lynx, w3m, etc..)
+# - targeted at text-only console browsers (w3m, links, etc..)
# - Only basic HTML, CSS only for line-wrapping <pre> text content for GUIs
# - No JavaScript, graphics or icons allowed.
# - Must not rely on static content
use constant SSOMA_URL => 'http://ssoma.public-inbox.org/';
use constant PI_URL => 'http://public-inbox.org/';
our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
+our $MID_RE = qr!([^/]+)!;
+our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
our $pi_config;
-BEGIN {
- $pi_config = PublicInbox::Config->new;
-}
sub run {
my ($cgi, $method) = @_;
- my %ctx;
+ $pi_config ||= PublicInbox::Config->new;
+ my $ctx = { cgi => $cgi, pi_config => $pi_config };
if ($method !~ /\AGET|HEAD\z/) {
return r(405, 'Method Not Allowed');
}
if ($path_info eq '/') {
r404();
} elsif ($path_info =~ m!$LISTNAME_RE\z!o) {
- invalid_list(\%ctx, $1) || redirect_list_index(\%ctx, $cgi);
+ invalid_list($ctx, $1) || r301($ctx, $1);
} elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
- invalid_list(\%ctx, $1) || get_index(\%ctx, $cgi, 0);
- } elsif ($path_info =~ m!$LISTNAME_RE/atom\.xml\z!o) {
- invalid_list(\%ctx, $1) || get_atom(\%ctx, $cgi, 0);
+ invalid_list($ctx, $1) || get_index($ctx);
+ } elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) {
+ invalid_list($ctx, $1) || get_atom($ctx);
- # single-message pages
- } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx, $cgi);
- } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx, $cgi);
-
- # full-message page
- } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx, $cgi);
+ } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) {
+ msg_page($ctx, $1, $2, $3);
- # convenience redirects, order matters
- } elsif ($path_info =~ m!$LISTNAME_RE/(?:m|f)/(\S+)\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || redirect_mid(\%ctx, $cgi);
+ # convenience redirects order matters
+ } elsif ($path_info =~ m!$LISTNAME_RE/([^/]{2,})\z!o) {
+ r301($ctx, $1, $2);
} else {
- r404();
+ legacy_redirects($ctx, $path_info);
}
}
require PublicInbox::Feed;
require PublicInbox::View;
require PublicInbox::Thread;
+ require PublicInbox::GitCatFile;
require Email::MIME;
require Digest::SHA;
require POSIX;
- require XML::Atom::SimpleFeed;
+
+ eval {
+ require PublicInbox::Search;
+ require PublicInbox::Mbox;
+ require IO::Compress::Gzip;
+ };
}
# private functions below
-sub r404 { r(404, 'Not Found') }
+sub r404 {
+ my ($ctx) = @_;
+ if ($ctx && $ctx->{mid}) {
+ require PublicInbox::ExtMsg;
+ return PublicInbox::ExtMsg::ext_msg($ctx);
+ }
+ r(404, 'Not Found');
+}
# simple response for errors
sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] }
sub invalid_list_mid {
my ($ctx, $listname, $mid) = @_;
my $ret = invalid_list($ctx, $listname, $mid);
- $ctx->{mid} = uri_unescape($mid) unless $ret;
- $ret;
+ return $ret if $ret;
+
+ $ctx->{mid} = $mid = uri_unescape($mid);
+ if ($mid =~ /\A[a-f0-9]{40}\z/) {
+ if ($mid = mid2blob($ctx)) {
+ require Email::Simple;
+ use PublicInbox::MID qw/mid_clean/;
+ $mid = Email::Simple->new($mid);
+ $ctx->{mid} = mid_clean($mid->header('Message-ID'));
+ }
+ }
+ undef;
}
-# /$LISTNAME/atom.xml -> Atom feed, includes replies
+# /$LISTNAME/new.atom -> Atom feed, includes replies
sub get_atom {
- my ($ctx, $cgi, $top) = @_;
+ my ($ctx) = @_;
require PublicInbox::Feed;
- [ 200, [ 'Content-Type' => 'application/xml' ],
- [ PublicInbox::Feed->generate({
- git_dir => $ctx->{git_dir},
- listname => $ctx->{listname},
- pi_config => $pi_config,
- cgi => $cgi,
- top => $top,
- }) ]
- ];
+ PublicInbox::Feed::generate($ctx);
}
# /$LISTNAME/?r=$GIT_COMMIT -> HTML only
sub get_index {
- my ($ctx, $cgi, $top) = @_;
+ my ($ctx) = @_;
require PublicInbox::Feed;
- [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- [ PublicInbox::Feed->generate_html_index({
- git_dir => $ctx->{git_dir},
- listname => $ctx->{listname},
- pi_config => $pi_config,
- cgi => $cgi,
- footer => footer($ctx),
- top => $top,
- }) ]
- ];
+ my $srch = searcher($ctx);
+ footer($ctx);
+ PublicInbox::Feed::generate_html_index($ctx);
}
# just returns a string ref for the blob in the current ctx
sub mid2blob {
my ($ctx) = @_;
- require Digest::SHA;
- my $hex = Digest::SHA::sha1_hex($ctx->{mid});
- $hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/i or
- die "BUG: not a SHA-1 hex: $hex";
-
+ require PublicInbox::MID;
+ my $path = PublicInbox::MID::mid2path($ctx->{mid});
my @cmd = ('git', "--git-dir=$ctx->{git_dir}",
- qw(cat-file blob), "HEAD:$1/$2");
- my $cmd = join(' ', @cmd);
+ qw(cat-file blob), "HEAD:$path");
my $pid = open my $fh, '-|';
defined $pid or die "fork failed: $!\n";
if ($pid == 0) {
}
}
-# /$LISTNAME/m/$MESSAGE_ID.txt -> raw original
+# /$LISTNAME/$MESSAGE_ID/raw -> raw mbox
sub get_mid_txt {
- my ($ctx, $cgi) = @_;
- my $x = mid2blob($ctx);
- $x ? [ 200, [ 'Content-Type' => 'text/plain' ], [ $$x ] ] : r404();
+ my ($ctx) = @_;
+ my $x = mid2blob($ctx) or return r404($ctx);
+ require PublicInbox::Mbox;
+ PublicInbox::Mbox::emit1($x);
}
-# /$LISTNAME/m/$MESSAGE_ID.html -> HTML content (short quotes)
+# /$LISTNAME/$MESSAGE_ID/ -> HTML content (short quotes)
sub get_mid_html {
- my ($ctx, $cgi) = @_;
- my $x = mid2blob($ctx);
- return r404() unless $x;
+ my ($ctx) = @_;
+ my $x = mid2blob($ctx) or return r404($ctx);
require PublicInbox::View;
- my $mid_href = PublicInbox::Hval::ascii_html(
- uri_escape_utf8($ctx->{mid}));
- my $pfx = "../f/$mid_href.html";
my $foot = footer($ctx);
require Email::MIME;
+ my $mime = Email::MIME->new($x);
+ searcher($ctx);
[ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- [ PublicInbox::View->msg_html(Email::MIME->new($x), $pfx, $foot) ] ];
+ [ PublicInbox::View::msg_html($ctx, $mime, 'f/', $foot) ] ];
}
-# /$LISTNAME/f/$MESSAGE_ID.html -> HTML content (fullquotes)
+# /$LISTNAME/$MESSAGE_ID/f/ -> HTML content (fullquotes)
sub get_full_html {
- my ($ctx, $cgi) = @_;
- my $x = mid2blob($ctx);
- return r404() unless $x;
+ my ($ctx) = @_;
+ my $x = mid2blob($ctx) or return r404($ctx);
+
require PublicInbox::View;
- require Email::MIME;
my $foot = footer($ctx);
+ require Email::MIME;
+ my $mime = Email::MIME->new($x);
+ searcher($ctx);
[ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- [ PublicInbox::View->msg_html(Email::MIME->new($x), undef, $foot)] ];
+ [ PublicInbox::View::msg_html($ctx, $mime, undef, $foot)] ];
+}
+
+# /$LISTNAME/$MESSAGE_ID/t/
+sub get_thread {
+ my ($ctx, $flat) = @_;
+ my $srch = searcher($ctx) or return need_search($ctx);
+ require PublicInbox::View;
+ my $foot = footer($ctx);
+ $ctx->{flat} = $flat;
+ PublicInbox::View::thread_html($ctx, $foot, $srch);
}
sub self_url {
ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string;
}
-sub redirect_list_index {
- my ($ctx, $cgi) = @_;
- do_redirect(self_url($cgi) . "/");
-}
-
-sub redirect_mid {
- my ($ctx, $cgi) = @_;
- my $url = self_url($cgi);
- $url =~ s!/f/!/m/!;
- do_redirect($url . '.html');
-}
-
-sub do_redirect {
- my ($url) = @_;
- [ 301,
- [ Location => $url, 'Content-Type' => 'text/plain' ],
- [ "Redirecting to $url\n" ]
- ]
-}
-
sub ctx_get {
my ($ctx, $key) = @_;
my $val = $ctx->{$key};
- (defined $val && length $val) or die "BUG: bad ctx, $key unusable\n";
+ (defined $val && $val ne '') or die "BUG: bad ctx, $key unusable\n";
$val;
}
my $footer = try_cat("$git_dir/public-inbox/footer.html");
if (defined $footer) {
chomp $footer;
+ $ctx->{footer} = $footer;
return $footer;
}
if ($nurls == 0) {
$urls = '($GIT_DIR/cloneurl missing)';
} elsif ($nurls == 1) {
- $urls = 'git archive URL for <a href="' . SSOMA_URL .
+ $urls = "git URL for <a\nhref=\"" . SSOMA_URL .
'">ssoma</a>: ' . $urls[0];
} else {
- $urls = 'git archive URLs for <a href="' . SSOMA_URL .
+ $urls = "git URLs for <a\nhref=\"" . SSOMA_URL .
"\">ssoma</a>:\n" . join("\n", map { "\t$_" } @urls);
}
$addr = $addr->[0]; # first address is primary
}
- $addr = "<a href=\"mailto:$addr\">$addr</a>";
- $desc = $desc;
- join("\n",
+ $addr = "<a\nhref=\"mailto:$addr\">$addr</a>";
+
+ $ctx->{footer} = join("\n",
'- ' . $desc,
- 'This is a <a href="' . PI_URL . '">public-inbox</a>, '.
- "anybody may post:",
- "\t$addr (text-only, no HTML please)",
+ "A <a\nhref=\"" . PI_URL . '">public-inbox</a>, ' .
+ 'anybody may post in plain-text (not HTML):',
+ $addr,
$urls
);
}
+# search support is optional, returns undef if Xapian is not installed
+# or not configured for the given GIT_DIR
+sub searcher {
+ my ($ctx) = @_;
+ eval {
+ require PublicInbox::Search;
+ $ctx->{srch} = PublicInbox::Search->new($ctx->{git_dir});
+ };
+}
+
+sub need_search {
+ my ($ctx) = @_;
+ my $msg = <<EOF;
+<html><head><title>Search not available for this
+public-inbox</title><body><pre>Search is not available for this public-inbox
+<a href="../">Return to index</a></pre></body></html>
+EOF
+ [ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ];
+}
+
+# /$LISTNAME/$MESSAGE_ID/t.mbox -> thread as mbox
+# /$LISTNAME/$MESSAGE_ID/t.mbox.gz -> thread as gzipped mbox
+# note: I'm not a big fan of other compression formats since they're
+# significantly more expensive on CPU than gzip and less-widely available,
+# especially on older systems. Stick to zlib since that's what git uses.
+sub get_thread_mbox {
+ my ($ctx, $sfx) = @_;
+ my $srch = searcher($ctx) or return need_search($ctx);
+ require PublicInbox::Mbox;
+ PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx);
+}
+
+
+# /$LISTNAME/$MESSAGE_ID/t.atom -> thread as Atom feed
+sub get_thread_atom {
+ my ($ctx) = @_;
+ searcher($ctx) or return need_search($ctx);
+ $ctx->{self_url} = self_url($ctx->{cgi});
+ require PublicInbox::Feed;
+ PublicInbox::Feed::generate_thread_atom($ctx);
+}
+
+sub legacy_redirects {
+ my ($ctx, $path_info) = @_;
+
+ # single-message pages
+ if ($path_info =~ m!$LISTNAME_RE/m/(\S+)/\z!o) {
+ r301($ctx, $1, $2);
+ } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) {
+ r301($ctx, $1, $2, 'raw');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) {
+ r301($ctx, $1, $2, 'f/');
+
+ # thread display
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) {
+ r301($ctx, $1, $2, 't/#u');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!o) {
+ r301($ctx, $1, $2, "t.mbox$3");
+
+ # even older legacy redirects
+ } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) {
+ r301($ctx, $1, $2);
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) {
+ r301($ctx, $1, $2, 't/#u');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) {
+ r301($ctx, $1, $2, 'f/');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/(?:m|f)/(\S+)\.txt\z!o) {
+ r301($ctx, $1, $2, 'raw');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) {
+ r301($ctx, $1, $2, "t$3");
+
+ # legacy convenience redirects, order still matters
+ } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\z!o) {
+ r301($ctx, $1, $2);
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\z!o) {
+ r301($ctx, $1, $2, 't/#u');
+ } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\z!o) {
+ r301($ctx, $1, $2, 'f/');
+
+ # some Message-IDs have slashes in them and the HTTP server
+ # may try to be clever and unescape them :<
+ } elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/$END_RE\z!o) {
+ msg_page($ctx, $1, $2, $3);
+
+ } else {
+ r404();
+ }
+}
+
+sub r301 {
+ my ($ctx, $listname, $mid, $suffix) = @_;
+ my $cgi = $ctx->{cgi};
+ my $url;
+ if (ref($cgi) eq 'CGI') {
+ $url = $cgi->url(-base) . '/';
+ } else {
+ $url = $cgi->base->as_string;
+ }
+
+ $url .= $listname . '/';
+ $url .= (uri_escape_utf8($mid) . '/') if (defined $mid);
+ $url .= $suffix if (defined $suffix);
+
+ [ 301,
+ [ Location => $url, 'Content-Type' => 'text/plain' ],
+ [ "Redirecting to $url\n" ] ]
+}
+
+sub msg_page {
+ my ($ctx, $list, $mid, $e) = @_;
+ unless (invalid_list_mid($ctx, $list, $mid)) {
+ '' eq $e and return get_mid_html($ctx);
+ 't/' eq $e and return get_thread($ctx);
+ 't.atom' eq $e and return get_thread_atom($ctx);
+ 't.mbox' eq $e and return get_thread_mbox($ctx);
+ 't.mbox.gz' eq $e and return get_thread_mbox($ctx, '.gz');
+ 'T/' eq $e and return get_thread($ctx, 1);
+ 'raw' eq $e and return get_mid_txt($ctx);
+ 'f/' eq $e and return get_full_html($ctx);
+ }
+ r404($ctx);
+}
+
1;