-# Copyright (C) 2014-2019 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Main web interface for mailing list archives
# - Must not rely on static content
# - UTF-8 is only for user-content, 7-bit US-ASCII for us
package PublicInbox::WWW;
-use 5.010_001;
use strict;
-use warnings;
-use bytes (); # only for bytes::length
+use v5.10.1;
use PublicInbox::Config;
use PublicInbox::Hval;
use URI::Escape qw(uri_unescape);
use PublicInbox::MID qw(mid_escape);
-require PublicInbox::Git;
use PublicInbox::GitHTTPBackend;
use PublicInbox::UserContent;
+use PublicInbox::WwwStatic qw(r path_info_raw);
+use PublicInbox::Eml;
# TODO: consider a routing tree now that we have more endpoints:
-our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
+our $INBOX_RE = qr!\A/([\w\-][\w\.\-\+]*)!;
our $MID_RE = qr!([^/]+)!;
-our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
+our $END_RE = qr!(T/|t/|d/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
-our $OID_RE = qr![a-f0-9]{7,40}!;
+our $OID_RE = qr![a-f0-9]{7,}!;
sub new {
- my ($class, $pi_config) = @_;
- $pi_config ||= PublicInbox::Config->new;
- bless { pi_config => $pi_config }, $class;
+ my ($class, $pi_cfg) = @_;
+ bless { pi_cfg => $pi_cfg // PublicInbox::Config->new }, $class;
}
# backwards compatibility, do not use
PublicInbox::WWW->new->call($req->env);
}
-my %path_re_cache;
-
-sub path_re ($) {
- my $sn = $_[0]->{SCRIPT_NAME};
- $path_re_cache{$sn} ||= do {
- $sn = '/'.$sn unless index($sn, '/') == 0;
- $sn =~ s!/\z!!;
- qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!;
- };
-}
-
sub call {
my ($self, $env) = @_;
my $ctx = { env => $env, www => $self };
%{$ctx->{qp}} = map {
utf8::decode($_);
tr/+/ /;
- my ($k, $v) = split('=', $_, 2);
- $v = uri_unescape($v // '');
+ my ($k, $v) = split(/=/, $_, 2);
# none of the keys we care about will need escaping
- $k => $v;
+ ($k // '', uri_unescape($v // ''))
} split(/[&;]+/, $env->{QUERY_STRING});
- # avoiding $env->{PATH_INFO} here since that's already decoded
- my ($path_info) = ($env->{REQUEST_URI} =~ path_re($env));
- $path_info //= $env->{PATH_INFO};
+ my $path_info = path_info_raw($env);
my $method = $env->{REQUEST_METHOD};
if ($method eq 'POST') {
my ($epoch, $path) = ($2, $3);
return invalid_inbox($ctx, $1) ||
serve_git($ctx, $epoch, $path);
+ } elsif ($path_info =~ m!$INBOX_RE/(\w+)\.sql\.gz\z!o) {
+ return get_altid_dump($ctx, $1, $2);
+ } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$ATTACH_RE\z!o) {
+ my ($idx, $fn) = ($3, $4);
+ return invalid_inbox_mid($ctx, $1, $2) ||
+ get_attach($ctx, $idx, $fn);
} elsif ($path_info =~ m!$INBOX_RE/!o) {
return invalid_inbox($ctx, $1) || mbox_results($ctx);
}
}
- elsif ($method !~ /\AGET|HEAD\z/) {
- return r(405, 'Method Not Allowed');
+ elsif ($method !~ /\A(?:GET|HEAD)\z/) {
+ return r(405);
}
# top-level indices and feeds
- if ($path_info eq '/' || $path_info eq '/manifest.js.gz') {
- www_listing($self)->call($env);
+ if ($path_info eq '/') {
+ require PublicInbox::WwwListing;
+ PublicInbox::WwwListing->response($ctx);
+ } elsif ($path_info eq '/manifest.js.gz') {
+ require PublicInbox::ManifestJsGz;
+ PublicInbox::ManifestJsGz->response($ctx);
} elsif ($path_info =~ m!$INBOX_RE\z!o) {
invalid_inbox($ctx, $1) || r301($ctx, $1);
} elsif ($path_info =~ m!$INBOX_RE(?:/|/index\.html)?\z!o) {
invalid_inbox($ctx, $1) || get_atom($ctx);
} elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
invalid_inbox($ctx, $1) || get_new($ctx);
+ } elsif ($path_info =~ m!$INBOX_RE/description\z!o) {
+ get_description($ctx, $1);
} elsif ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)?
($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
my ($epoch, $path) = ($2, $3);
get_vcs_object($ctx, $1, $2, $3);
} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
r301($ctx, $1, $2, 's/');
+ } elsif ($path_info =~ m!$INBOX_RE/(\w+)\.sql\.gz\z!o) {
+ get_altid_dump($ctx, $1, $2);
# convenience redirects order matters
} elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
r301($ctx, $1, $2);
-
+ } elsif ($path_info =~ m!\A/\+/([a-zA-Z0-9_\-\.]+)\.css\z!) {
+ get_css($ctx, undef, $1); # for WwwListing
} else {
legacy_redirects($ctx, $path_info);
}
}
-# for CoW-friendliness, MOOOOO!
+# for CoW-friendliness, MOOOOO! Even for single-process setups,
+# we want to get all immortal allocations done early to avoid heap
+# fragmentation since common allocators favor a large contiguous heap.
sub preload {
my ($self) = @_;
+
+ # populate caches used by Encode internally, since emails
+ # may show up with any encoding.
+ require Encode;
+ Encode::find_encoding($_) for Encode->encodings(':all');
+
+ require PublicInbox::ExtMsg;
require PublicInbox::Feed;
require PublicInbox::View;
require PublicInbox::SearchThread;
- require PublicInbox::MIME;
- require Digest::SHA;
- require POSIX;
+ require PublicInbox::Eml;
+ require PublicInbox::Mbox;
+ require PublicInbox::ViewVCS;
+ require PublicInbox::WwwText;
+ require PublicInbox::WwwAttach;
eval {
require PublicInbox::Search;
PublicInbox::Search::load_xapian();
};
- foreach (qw(PublicInbox::SearchView
- PublicInbox::Mbox IO::Compress::Gzip
- PublicInbox::NewsWWW)) {
- eval "require $_;";
+ for (qw(SearchView MboxGz WwwAltId)) {
+ eval "require PublicInbox::$_;";
}
if (ref($self)) {
+ my $pi_cfg = $self->{pi_cfg};
+ if (defined($pi_cfg->{'publicinbox.cgitrc'})) {
+ $pi_cfg->limiter('-cgit');
+ }
+ $pi_cfg->ALL and require PublicInbox::Isearch;
$self->cgit;
$self->stylesheets_prepare($_) for ('', '../', '../../');
- $self->www_listing;
+ $self->news_www;
}
}
require PublicInbox::ExtMsg;
return PublicInbox::ExtMsg::ext_msg($ctx);
}
- r(404, 'Not Found');
+ r(404);
}
-# simple response for errors
-sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] }
-
sub news_cgit_fallback ($) {
my ($ctx) = @_;
- my $www = $ctx->{www};
- my $env = $ctx->{env};
- my $res = $www->news_www->call($env);
- $res->[0] == 404 ? $www->cgit->call($env) : $res;
+ my $res = $ctx->{www}->news_www->call($ctx->{env});
+
+ $res->[0] == 404 and ($ctx->{www}->{cgit_fallback} //= do {
+ my $c = $ctx->{www}->{pi_cfg}->{'publicinbox.cgit'} // 'first';
+ $c ne 'first' # `fallback' and `rewrite' => true
+ } // 0) and $res = $ctx->{www}->coderepo->srv($ctx);
+
+ ref($res) eq 'ARRAY' && $res->[0] == 404 and
+ $res = $ctx->{www}->cgit->call($ctx->{env}, $ctx);
+
+ ref($res) eq 'ARRAY' && $res->[0] == 404 &&
+ !$ctx->{www}->{cgit_fallback} and
+ $res = $ctx->{www}->coderepo->srv($ctx);
+ $res;
}
# returns undef if valid, array ref response if invalid
sub invalid_inbox ($$) {
my ($ctx, $inbox) = @_;
- my $ibx = $ctx->{www}->{pi_config}->lookup_name($inbox);
+ my $ibx = $ctx->{www}->{pi_cfg}->lookup_name($inbox) //
+ $ctx->{www}->{pi_cfg}->lookup_ei($inbox);
if (defined $ibx) {
- $ctx->{-inbox} = $ibx;
+ $ctx->{ibx} = $ibx;
return;
}
return $ret if $ret;
my $mid = $ctx->{mid} = uri_unescape($mid_ue);
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) {
my ($x2, $x38) = ($1, $2);
# this is horrifically wasteful for legacy URLs:
- my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return;
- require Email::Simple;
- my $s = Email::Simple->new($str);
- $mid = PublicInbox::MID::mid_clean($s->header('Message-ID'));
+ my $str = $ctx->{ibx}->msg_by_path("$x2/$x38") or return;
+ my $s = PublicInbox::Eml->new($str);
+ $mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID'));
return r301($ctx, $inbox, mid_escape($mid));
}
undef;
sub get_mid_txt {
my ($ctx) = @_;
require PublicInbox::Mbox;
- PublicInbox::Mbox::emit_raw($ctx) || r404($ctx);
+ PublicInbox::Mbox::emit_raw($ctx) || r(404);
}
# /$INBOX/$MESSAGE_ID/ -> HTML content (short quotes)
# /$INBOX/$MESSAGE_ID/t/
sub get_thread {
my ($ctx, $flat) = @_;
- $ctx->{-inbox}->over or return need($ctx, 'Overview');
+ $ctx->{ibx}->over or return need($ctx, 'Overview');
$ctx->{flat} = $flat;
require PublicInbox::View;
PublicInbox::View::thread_html($ctx);
}
# show git objects (blobs and commits)
-# /$INBOX/_/$OBJECT_ID/show
-# /$INBOX/_/${OBJECT_ID}_${FILENAME}
-# KEY may contain slashes
+# /$INBOX/$GIT_OBJECT_ID/s/
+# /$INBOX/$GIT_OBJECT_ID/s/$FILENAME
sub get_vcs_object ($$$;$) {
my ($ctx, $inbox, $oid, $filename) = @_;
my $r404 = invalid_inbox($ctx, $inbox);
return $r404 if $r404;
+ return r(404) if !$ctx->{www}->{pi_cfg}->repo_objs($ctx->{ibx});
require PublicInbox::ViewVCS;
PublicInbox::ViewVCS::show($ctx, $oid, $filename);
}
+sub get_altid_dump {
+ my ($ctx, $inbox, $altid_pfx) =@_;
+ my $r404 = invalid_inbox($ctx, $inbox);
+ return $r404 if $r404;
+ eval { require PublicInbox::WwwAltId } or return need($ctx, 'sqlite3');
+ PublicInbox::WwwAltId::sqldump($ctx, $altid_pfx);
+}
+
sub need {
my ($ctx, $extra) = @_;
- my $msg = <<EOF;
-<html><head><title>$extra not available for this
-public-inbox</title><body><pre>$extra is not available for this public-inbox
-<a href="../">Return to index</a></pre></body></html>
+ require PublicInbox::WwwStream;
+ PublicInbox::WwwStream::html_oneshot($ctx, 501, <<EOF);
+<pre>$extra is not available for this public-inbox
+<a\nhref="../">Return to index</a></pre>
EOF
- [ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ];
}
# /$INBOX/$MESSAGE_ID/t.mbox -> thread as mbox
# especially on older systems. Stick to zlib since that's what git uses.
sub get_thread_mbox {
my ($ctx, $sfx) = @_;
- my $over = $ctx->{-inbox}->over or return need($ctx, 'Overview');
+ my $over = $ctx->{ibx}->over or return need($ctx, 'Overview');
require PublicInbox::Mbox;
PublicInbox::Mbox::thread_mbox($ctx, $over, $sfx);
}
# /$INBOX/$MESSAGE_ID/t.atom -> thread as Atom feed
sub get_thread_atom {
my ($ctx) = @_;
- $ctx->{-inbox}->over or return need($ctx, 'Overview');
+ $ctx->{ibx}->over or return need($ctx, 'Overview');
require PublicInbox::Feed;
PublicInbox::Feed::generate_thread_atom($ctx);
}
sub r301 {
my ($ctx, $inbox, $mid_ue, $suffix) = @_;
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
unless ($ibx) {
my $r404 = invalid_inbox($ctx, $inbox);
return $r404 if $r404;
- $ibx = $ctx->{-inbox};
+ $ibx = $ctx->{ibx};
}
my $url = $ibx->base_url($ctx->{env});
my $qs = $ctx->{env}->{QUERY_STRING};
# legacy, but no redirect for compatibility:
'f/' eq $e and return get_mid_html($ctx);
+ if ($e eq 'd/') {
+ require PublicInbox::View;
+ return PublicInbox::View::diff_msg($ctx);
+ }
r404($ctx);
}
sub serve_git {
my ($ctx, $epoch, $path) = @_;
my $env = $ctx->{env};
- my $ibx = $ctx->{-inbox};
+ my $ibx = $ctx->{ibx};
my $git = defined $epoch ? $ibx->git_epoch($epoch) : $ibx->git;
$git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404();
}
sub mbox_results {
my ($ctx) = @_;
if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
- $ctx->{-inbox}->search or return need($ctx, 'search');
+ $ctx->{ibx}->isrch or return need($ctx, 'search');
require PublicInbox::SearchView;
return PublicInbox::SearchView::mbox_results($ctx);
}
sub news_www {
my ($self) = @_;
- $self->{news_www} ||= do {
+ $self->{news_www} //= do {
require PublicInbox::NewsWWW;
- PublicInbox::NewsWWW->new($self->{pi_config});
+ PublicInbox::NewsWWW->new($self->{pi_cfg});
}
}
sub cgit {
my ($self) = @_;
- $self->{cgit} ||= do {
- my $pi_config = $self->{pi_config};
-
- if (defined($pi_config->{'publicinbox.cgitrc'})) {
+ $self->{cgit} //=
+ (defined($self->{pi_cfg}->{'publicinbox.cgitrc'}) ? do {
require PublicInbox::Cgit;
- PublicInbox::Cgit->new($pi_config);
- } else {
+ PublicInbox::Cgit->new($self->{pi_cfg});
+ } : undef) // do {
require Plack::Util;
Plack::Util::inline_object(call => sub { r404() });
- }
- }
+ };
}
-sub www_listing {
+sub coderepo {
my ($self) = @_;
- $self->{www_listing} ||= do {
- require PublicInbox::WwwListing;
- PublicInbox::WwwListing->new($self);
+ $self->{coderepo} //= do {
+ require PublicInbox::WwwCoderepo;
+ PublicInbox::WwwCoderepo->new($self->{pi_cfg});
}
}
my ($ctx, $inbox, $key) = @_;
my $r404 = invalid_inbox($ctx, $inbox);
return $r404 if $r404;
- require PublicInbox::WwwListing;
- PublicInbox::WwwListing::js($ctx->{env}, [$ctx->{-inbox}]);
+ require PublicInbox::ManifestJsGz;
+ PublicInbox::ManifestJsGz::per_inbox($ctx);
}
sub get_attach {
} || sub { $_[0] };
my $css_map = {};
- my $stylesheets = $self->{pi_config}->{css} || [];
+ my $stylesheets = $self->{pi_cfg}->{css} || [];
my $links = [];
my $inline_ok = 1;
};
}
-# /$INBOX/$KEY.css endpoint
+# /$INBOX/$KEY.css and /+/$KEY.css endpoints
# CSS is configured globally for all inboxes, but we access them on
# a per-inbox basis. This allows administrators to setup per-inbox
# static routes to intercept the request before it hits PSGI
+# inbox == undef => top-level WwwListing
sub get_css ($$$) {
my ($ctx, $inbox, $key) = @_;
- my $r404 = invalid_inbox($ctx, $inbox);
+ my $r404 = defined($inbox) ? invalid_inbox($ctx, $inbox) : undef;
return $r404 if $r404;
my $self = $ctx->{www};
- my $css_map = $self->{-css_map} || stylesheets_prepare($self, '');
+ my $css_map = $self->{-css_map} ||
+ stylesheets_prepare($self, defined($inbox) ? '' : '+/');
my $css = $css_map->{$key};
- if (!defined($css) && $key eq 'userContent') {
+ if (!defined($css) && defined($inbox) && $key eq 'userContent') {
my $env = $ctx->{env};
- $css = PublicInbox::UserContent::sample($ctx->{-inbox}, $env);
+ $css = PublicInbox::UserContent::sample($ctx->{ibx}, $env);
}
defined $css or return r404();
- my $h = [ 'Content-Length', bytes::length($css),
- 'Content-Type', 'text/css' ];
+ my $h = [ 'Content-Length', length($css), 'Content-Type', 'text/css' ];
PublicInbox::GitHTTPBackend::cache_one_year($h);
[ 200, $h, [ $css ] ];
}
+sub get_description {
+ my ($ctx, $inbox) = @_;
+ invalid_inbox($ctx, $inbox) || do {
+ my $d = $ctx->{ibx}->description . "\n";
+ utf8::encode($d);
+ [ 200, [ 'Content-Length', length($d),
+ 'Content-Type', 'text/plain' ], [ $d ] ];
+ };
+}
+
+sub event_step { # called via requeue
+ my ($self) = @_;
+ # gzf = PublicInbox::GzipFilter == $ctx
+ my $gzf = shift(@{$self->{-low_prio_q}}) // return;
+ PublicInbox::DS::requeue($self) if scalar(@{$self->{-low_prio_q}});
+ my $http = $gzf->{env}->{'psgix.io'}; # PublicInbox::HTTP
+ $http->next_step($gzf->can('async_next'));
+}
+
1;