This should allow us to more-easily test with Plack.
.gitignore
COPYING
+Documentation/dc-dlvr-spam-flow.txt
Documentation/design_notes.txt
+Documentation/design_www.txt
+Documentation/include.mk
+Documentation/public-inbox-mda.txt
+Documentation/txt2pre
+HACKING
INSTALL
MANIFEST
Makefile.PL
README
+TODO
+examples/README
+examples/apache2_cgi.conf
+examples/apache2_perl.conf
+examples/cgi-webrick.rb
examples/public-inbox-config
-lib/PublicInbox/MDA.pm
+examples/public-inbox.psgi
lib/PublicInbox/Config.pm
lib/PublicInbox/Feed.pm
lib/PublicInbox/Filter.pm
lib/PublicInbox/GitCatFile.pm
lib/PublicInbox/Hval.pm
+lib/PublicInbox/MDA.pm
lib/PublicInbox/View.pm
+lib/PublicInbox/WWW.pm
+public-inbox-learn
public-inbox-mda
+public-inbox.cgi
sa_config/Makefile
sa_config/README
sa_config/root/etc/spamassassin/public-inbox.pre
sa_config/user/.spamassassin/user_prefs
scripts/dc-dlvr
-scripts/import_gmane_spool
+scripts/dc-dlvr.pre
+scripts/edit-sa-prefs
+scripts/import_maildir
scripts/report-spam
+scripts/slrnspool2maildir
+t/cgi.t
t/config.t
t/fail-bin/spamc
t/feed.t
t/filter.t
+t/html_index.t
t/main-bin/spamc
t/mda.t
+t/plack.t
t/precheck.t
+t/utf8.mbox
t/view.t
--- /dev/null
+#!/usr/bin/perl -w
+# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+# Note: this is part of our test suite, update t/plack.t if this changes
+use strict;
+use warnings;
+require PublicInbox::WWW;
+require Plack::Request;
+sub {
+ my $req = Plack::Request->new(@_);
+ PublicInbox::WWW::run($req, $req->method);
+};
--- /dev/null
+# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+#
+# We focus on the lowest common denominators here:
+# - targeted at text-only console browsers (lynx, w3m, etc..)
+# - Only basic HTML, CSS only for line-wrapping <pre> text content for GUIs
+# - No JavaScript, graphics or icons allowed.
+# - Must not rely on static content
+# - UTF-8 is only for user-content, 7-bit US-ASCII for us
+package PublicInbox::WWW;
+use 5.008;
+use strict;
+use warnings;
+use PublicInbox::Config;
+use URI::Escape qw(uri_escape_utf8 uri_unescape);
+our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
+our $pi_config;
+BEGIN {
+ $pi_config = PublicInbox::Config->new;
+}
+
+sub run {
+ my ($cgi, $method) = @_;
+ my %ctx;
+ if ($method !~ /\AGET|HEAD\z/) {
+ return r(405, 'Method Not Allowed');
+ }
+ my $path_info = $cgi->path_info;
+
+ # top-level indices and feeds
+ if ($path_info eq '/') {
+ r404();
+ } elsif ($path_info =~ m!$LISTNAME_RE\z!o) {
+ invalid_list(\%ctx, $1) || redirect_list_index(\%ctx, $cgi);
+ } elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
+ invalid_list(\%ctx, $1) || get_index(\%ctx, $cgi, 0);
+ } elsif ($path_info =~ m!$LISTNAME_RE/atom\.xml\z!o) {
+ invalid_list(\%ctx, $1) || get_atom(\%ctx, $cgi, 0);
+
+ # single-message pages
+ } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) {
+ invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx, $cgi);
+ } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) {
+ invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx, $cgi);
+
+ # full-message page
+ } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) {
+ invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx, $cgi);
+
+ # convenience redirects, order matters
+ } elsif ($path_info =~ m!$LISTNAME_RE/(?:m|f)/(\S+)\z!o) {
+ invalid_list_mid(\%ctx, $1, $2) || redirect_mid(\%ctx, $cgi);
+
+ } else {
+ r404();
+ }
+}
+
+# for CoW-friendliness, MOOOOO!
+sub preload {
+ require PublicInbox::Feed;
+ require PublicInbox::View;
+ require Mail::Thread;
+ require Email::MIME;
+ require Digest::SHA;
+ require POSIX;
+ require XML::Atom::SimpleFeed;
+}
+
+# private functions below
+
+sub r404 { r(404, 'Not Found') }
+
+# simple response for errors
+sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] }
+
+# returns undef if valid, array ref response if invalid
+sub invalid_list {
+ my ($ctx, $listname) = @_;
+ my $git_dir = $pi_config->get($listname, "mainrepo");
+ if (defined $git_dir) {
+ $ctx->{git_dir} = $git_dir;
+ $ctx->{listname} = $listname;
+ return;
+ }
+ r404();
+}
+
+# returns undef if valid, array ref response if invalid
+sub invalid_list_mid {
+ my ($ctx, $listname, $mid) = @_;
+ my $ret = invalid_list($ctx, $listname, $mid);
+ $ctx->{mid} = uri_unescape($mid) unless $ret;
+ $ret;
+}
+
+# /$LISTNAME/atom.xml -> Atom feed, includes replies
+sub get_atom {
+ my ($ctx, $cgi, $top) = @_;
+ require PublicInbox::Feed;
+ [ 200, [ 'Content-Type' => 'application/xml' ],
+ [ PublicInbox::Feed->generate({
+ git_dir => $ctx->{git_dir},
+ listname => $ctx->{listname},
+ pi_config => $pi_config,
+ cgi => $cgi,
+ top => $top,
+ }) ]
+ ];
+}
+
+# /$LISTNAME/?r=$GIT_COMMIT -> HTML only
+sub get_index {
+ my ($ctx, $cgi, $top) = @_;
+ require PublicInbox::Feed;
+ [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
+ [ PublicInbox::Feed->generate_html_index({
+ git_dir => $ctx->{git_dir},
+ listname => $ctx->{listname},
+ pi_config => $pi_config,
+ cgi => $cgi,
+ top => $top,
+ }) ]
+ ];
+}
+
+# just returns a string ref for the blob in the current ctx
+sub mid2blob {
+ my ($ctx) = @_;
+ require Digest::SHA;
+ my $hex = Digest::SHA::sha1_hex($ctx->{mid});
+ $hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/i or
+ die "BUG: not a SHA-1 hex: $hex";
+
+ my @cmd = ('git', "--git-dir=$ctx->{git_dir}",
+ qw(cat-file blob), "HEAD:$1/$2");
+ my $cmd = join(' ', @cmd);
+ my $pid = open my $fh, '-|';
+ defined $pid or die "fork failed: $!\n";
+ if ($pid == 0) {
+ open STDERR, '>', '/dev/null'; # ignore errors
+ exec @cmd or die "exec failed: $!\n";
+ } else {
+ my $blob = eval { local $/; <$fh> };
+ close $fh;
+ $? == 0 ? \$blob : undef;
+ }
+}
+
+# /$LISTNAME/m/$MESSAGE_ID.txt -> raw original
+sub get_mid_txt {
+ my ($ctx, $cgi) = @_;
+ my $x = mid2blob($ctx);
+ $x ? [ 200, [ 'Content-Type' => 'text/plain' ], [ $$x ] ] : r404();
+}
+
+# /$LISTNAME/m/$MESSAGE_ID.html -> HTML content (short quotes)
+sub get_mid_html {
+ my ($ctx, $cgi) = @_;
+ my $x = mid2blob($ctx);
+ return r404() unless $x;
+
+ require PublicInbox::View;
+ my $mid_href = PublicInbox::Hval::ascii_html(
+ uri_escape_utf8($ctx->{mid}));
+ my $pfx = "../f/$mid_href.html";
+ require Email::MIME;
+ [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
+ [ PublicInbox::View->as_html(Email::MIME->new($$x), $pfx) ] ];
+}
+
+# /$LISTNAME/f/$MESSAGE_ID.html -> HTML content (fullquotes)
+sub get_full_html {
+ my ($ctx, $cgi) = @_;
+ my $x = mid2blob($ctx);
+ return r404() unless $x;
+ require PublicInbox::View;
+ require Email::MIME;
+ [ 200, [ 'Content-Type' => 'text/html' ],
+ [ PublicInbox::View->as_html(Email::MIME->new($$x))] ];
+}
+
+sub self_url {
+ my ($cgi) = @_;
+ ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string;
+}
+
+sub redirect_list_index {
+ my ($ctx, $cgi) = @_;
+ do_redirect(self_url($cgi) . "/");
+}
+
+sub redirect_mid {
+ my ($ctx, $cgi) = @_;
+ my $url = self_url($cgi);
+ $url =~ s!/f/!/m/!;
+ do_redirect($url . '.html');
+}
+
+sub do_redirect {
+ my ($url) = @_;
+ [ 301,
+ [ Location => $url, 'Content-Type' => 'text/plain' ],
+ [ "Redirecting to $url\n" ]
+ ]
+}
+
+1;
#!/usr/bin/perl -w
# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors
# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
-#
-# We focus on the lowest common denominators here:
-# - targeted at text-only console browsers (lynx, w3m, etc..)
-# - Only basic HTML, CSS only for line-wrapping <pre> text content for GUIs
-# - No JavaScript, graphics or icons allowed.
-# - Must not rely on static content
-# - UTF-8 is only for user-content, 7-bit US-ASCII for us
-
-use 5.008;
use strict;
use warnings;
-use PublicInbox::Config;
-use URI::Escape qw(uri_escape_utf8 uri_unescape);
-our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
-our $NO_SCRIPT_NAME; # for prettier redirects with mod_perl2
-our $pi_config;
+require PublicInbox::WWW;
+use CGI qw/-nosticky/;
+our $NO_SCRIPT_NAME;
BEGIN {
- $pi_config = PublicInbox::Config->new;
- # TODO: detect and reload config as needed
$NO_SCRIPT_NAME = 1 if $ENV{NO_SCRIPT_NAME};
- if ($ENV{MOD_PERL}) {
- require CGI;
- no warnings;
- $CGI::NOSTICKY = 1;
- CGI->compile;
- }
-}
-
-if ($ENV{PI_PLACKUP}) {
- psgi_app();
-} else {
- # some servers (Ruby webrick) include scheme://host[:port] here,
- # which confuses CGI.pm when generating self_url.
- # RFC 3875 does not mention REQUEST_URI at all,
- # so nuke it since CGI.pm functions without it.
- require CGI;
- delete $ENV{REQUEST_URI};
- $ENV{SCRIPT_NAME} = '' if $NO_SCRIPT_NAME;
- my $req = CGI->new;
- my $ret = main($req, $req->request_method);
- binmode STDOUT;
- if (@ARGV && $ARGV[0] eq 'static') {
- print $ret->[2]->[0];
- } else { # CGI
- cgi_print($ret);
- }
-}
-
-# private functions below
-
-sub main {
- my ($cgi, $method) = @_;
- my %ctx;
- if ($method !~ /\AGET|HEAD\z/) {
- return r(405, 'Method Not Allowed');
- }
- my $path_info = $cgi->path_info;
-
- # top-level indices and feeds
- if ($path_info eq '/') {
- r404();
- } elsif ($path_info =~ m!$LISTNAME_RE\z!o) {
- invalid_list(\%ctx, $1) || redirect_list_index(\%ctx, $cgi);
- } elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
- invalid_list(\%ctx, $1) || get_index(\%ctx, $cgi, 0);
- } elsif ($path_info =~ m!$LISTNAME_RE/atom\.xml\z!o) {
- invalid_list(\%ctx, $1) || get_atom(\%ctx, $cgi, 0);
-
- # single-message pages
- } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx, $cgi);
- } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx, $cgi);
-
- # full-message page
- } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx, $cgi);
-
- # convenience redirects, order matters
- } elsif ($path_info =~ m!$LISTNAME_RE/(?:m|f)/(\S+)\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || redirect_mid(\%ctx, $cgi);
-
- } else {
- r404();
- }
-}
-
-sub r404 { r(404, 'Not Found') }
-
-# simple response for errors
-sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] }
-
-# returns undef if valid, array ref response if invalid
-sub invalid_list {
- my ($ctx, $listname) = @_;
- my $git_dir = $pi_config->get($listname, "mainrepo");
- if (defined $git_dir) {
- $ctx->{git_dir} = $git_dir;
- $ctx->{listname} = $listname;
- return;
- }
- r404();
-}
-
-# returns undef if valid, array ref response if invalid
-sub invalid_list_mid {
- my ($ctx, $listname, $mid) = @_;
- my $ret = invalid_list($ctx, $listname, $mid);
- $ctx->{mid} = uri_unescape($mid) unless $ret;
- $ret;
-}
-
-# /$LISTNAME/atom.xml -> Atom feed, includes replies
-sub get_atom {
- my ($ctx, $cgi, $top) = @_;
- require PublicInbox::Feed;
- [ 200, [ 'Content-Type' => 'application/xml' ],
- [ PublicInbox::Feed->generate({
- git_dir => $ctx->{git_dir},
- listname => $ctx->{listname},
- pi_config => $pi_config,
- cgi => $cgi,
- top => $top,
- }) ]
- ];
-}
-
-# /$LISTNAME/?r=$GIT_COMMIT -> HTML only
-sub get_index {
- my ($ctx, $cgi, $top) = @_;
- require PublicInbox::Feed;
- [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- [ PublicInbox::Feed->generate_html_index({
- git_dir => $ctx->{git_dir},
- listname => $ctx->{listname},
- pi_config => $pi_config,
- cgi => $cgi,
- top => $top,
- }) ]
- ];
-}
-
-# just returns a string ref for the blob in the current ctx
-sub mid2blob {
- my ($ctx) = @_;
- require Digest::SHA;
- my $hex = Digest::SHA::sha1_hex($ctx->{mid});
- $hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/i or
- die "BUG: not a SHA-1 hex: $hex";
-
- my @cmd = ('git', "--git-dir=$ctx->{git_dir}",
- qw(cat-file blob), "HEAD:$1/$2");
- my $cmd = join(' ', @cmd);
- my $pid = open my $fh, '-|';
- defined $pid or die "fork failed: $!\n";
- if ($pid == 0) {
- open STDERR, '>', '/dev/null'; # ignore errors
- exec @cmd or die "exec failed: $!\n";
- } else {
- my $blob = eval { local $/; <$fh> };
- close $fh;
- $? == 0 ? \$blob : undef;
- }
-}
-
-# /$LISTNAME/m/$MESSAGE_ID.txt -> raw original
-sub get_mid_txt {
- my ($ctx, $cgi) = @_;
- my $x = mid2blob($ctx);
- $x ? [ 200, [ 'Content-Type' => 'text/plain' ], [ $$x ] ] : r404();
-}
-
-# /$LISTNAME/m/$MESSAGE_ID.html -> HTML content (short quotes)
-sub get_mid_html {
- my ($ctx, $cgi) = @_;
- my $x = mid2blob($ctx);
- return r404() unless $x;
-
- require PublicInbox::View;
- my $mid_href = PublicInbox::Hval::ascii_html(
- uri_escape_utf8($ctx->{mid}));
- my $pfx = "../f/$mid_href.html";
- require Email::MIME;
- [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- [ PublicInbox::View->as_html(Email::MIME->new($$x), $pfx) ] ];
-}
-
-# /$LISTNAME/f/$MESSAGE_ID.html -> HTML content (fullquotes)
-sub get_full_html {
- my ($ctx, $cgi) = @_;
- my $x = mid2blob($ctx);
- return r404() unless $x;
- require PublicInbox::View;
- require Email::MIME;
- [ 200, [ 'Content-Type' => 'text/html' ],
- [ PublicInbox::View->as_html(Email::MIME->new($$x))] ];
-}
-
-sub self_url {
- my ($cgi) = @_;
- ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string;
-}
-
-sub redirect_list_index {
- my ($ctx, $cgi) = @_;
- do_redirect(self_url($cgi) . "/");
-}
-
-sub redirect_mid {
- my ($ctx, $cgi) = @_;
- my $url = self_url($cgi);
- $url =~ s!/f/!/m/!;
- do_redirect($url . '.html');
-}
-
-sub do_redirect {
- my ($url) = @_;
- [ 301,
- [ Location => $url, 'Content-Type' => 'text/plain' ],
- [ "Redirecting to $url\n" ]
- ]
-}
-
-sub psgi_app {
- # preload so we are CoW friendly
- require PublicInbox::Feed;
- require PublicInbox::View;
- require Mail::Thread;
- require Digest::SHA;
- require POSIX;
- require XML::Atom::SimpleFeed;
- require Plack::Request;
- sub {
- my $req = Plack::Request->new(@_);
- main($req, $req->method);
- };
-}
-
-sub cgi_print {
- my ($ret) = @_;
+ CGI->compile if $ENV{MOD_PERL};
+}
+
+# some servers (Ruby webrick) include scheme://host[:port] here,
+# which confuses CGI.pm when generating self_url.
+# RFC 3875 does not mention REQUEST_URI at all,
+# so nuke it since CGI.pm functions without it.
+delete $ENV{REQUEST_URI};
+$ENV{SCRIPT_NAME} = '' if $NO_SCRIPT_NAME;
+my $req = CGI->new;
+my $ret = PublicInbox::WWW::run($req, $req->request_method);
+binmode STDOUT;
+if (@ARGV && $ARGV[0] eq 'static') {
+ print $ret->[2]->[0]; # only show the body
+} else { # CGI
my ($status, $headers, $body) = @$ret;
my %codes = (
200 => 'OK',
--- /dev/null
+# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use File::Temp qw/tempdir/;
+use Cwd;
+use IPC::Run qw/run/;
+my $psgi = "examples/public-inbox.psgi";
+my $mda = "blib/script/public-inbox-mda";
+my $tmpdir = tempdir(CLEANUP => 1);
+my $home = "$tmpdir/pi-home";
+my $pi_home = "$home/.public-inbox";
+my $pi_config = "$pi_home/config";
+my $maindir = "$tmpdir/main.git";
+my $main_bin = getcwd()."/t/main-bin";
+my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock
+my $addr = 'test-public@example.com';
+my $cfgpfx = "publicinbox.test";
+my $failbox = "$home/fail.mbox";
+local $ENV{PI_EMERGENCY} = $failbox;
+
+our $have_plack;
+eval {
+ require Plack::Request;
+ eval 'use Plack::Test; use HTTP::Request::Common';
+ $have_plack = 1;
+};
+SKIP: {
+ skip 'Plack not installed', 1 unless $have_plack;
+ ok(-f $psgi, "psgi example file found");
+ ok(-x "$main_bin/spamc",
+ "spamc ham mock found (run in top of source tree");
+ ok(-x $mda, "$mda is executable");
+ is(1, mkdir($home, 0755), "setup ~/ for testing");
+ is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox");
+ is(0, system(qw(git init -q --bare), $maindir), "git init (main)");
+ open my $fh, '>', "$maindir/description" or die "open: $!\n";
+ print $fh "test for public-inbox\n";
+ close $fh or die "close: $!\n";
+ my %cfg = (
+ "$cfgpfx.address" => $addr,
+ "$cfgpfx.mainrepo" => $maindir,
+ );
+ while (my ($k,$v) = each %cfg) {
+ is(0, system(qw(git config --file), $pi_config, $k, $v),
+ "setup $k");
+ }
+
+ local $ENV{HOME} = $home;
+ local $ENV{ORIGINAL_RECIPIENT} = $addr;
+
+ # ensure successful message delivery
+ {
+ my $simple = Email::Simple->new(<<EOF);
+From: Me <me\@example.com>
+To: You <you\@example.com>
+Cc: $addr
+Message-Id: <blah\@example.com>
+Subject: hihi
+Date: Thu, 01 Jan 1970 00:00:00 +0000
+
+zzzzzz
+EOF
+ my $in = $simple->as_string;
+ run_with_env({PATH => $main_path}, [$mda], \$in);
+ local $ENV{GIT_DIR} = $maindir;
+ my $rev = `git rev-list HEAD`;
+ like($rev, qr/\A[a-f0-9]{40}/, "good revision committed");
+ }
+ my $app = require $psgi;
+
+ # redirect with trailing /
+ test_psgi($app, sub {
+ my ($cb) = @_;
+ my $from = 'http://example.com/test';
+ my $to = "$from/";
+ my $res = $cb->(GET($from));
+ is(301, $res->code, 'is permanent redirect');
+ is($to, $res->header('Location'), 'redirect location matches');
+ });
+
+ test_psgi($app, sub {
+ my ($cb) = @_;
+ my $atomurl = 'http://example.com/test/atom.xml';
+ my $res = $cb->(GET('http://example.com/test/'));
+ is(200, $res->code, 'success response received');
+ like($res->content, qr!href="\Q$atomurl\E"!,
+ 'atom URL generated');
+ like($res->content, qr!href="m/blah%40example\.com\.html"!,
+ 'index generated');
+ });
+
+ test_psgi($app, sub {
+ my ($cb) = @_;
+ my $pfx = 'http://example.com/test';
+ my $res = $cb->(GET($pfx . '/atom.xml'));
+ is(200, $res->code, 'success response received for atom');
+ like($res->content,
+ qr!link href="\Q$pfx\E/m/blah%40example\.com\.html"!,
+ 'atom feed generated correct URL');
+ });
+}
+
+done_testing();
+
+sub run_with_env {
+ my ($env, @args) = @_;
+ my $init = sub { foreach my $k (keys %$env) { $ENV{$k} = $env->{$k} } };
+ run(@args, init => $init);
+}