From: Eric Wong Date: Sat, 5 Apr 2014 06:53:19 +0000 (+0000) Subject: get a basic CGI feed sender running X-Git-Tag: v1.0.0~1312 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=e97da7bb0d230fd624e7f21464c6355a4fdad119 get a basic CGI feed sender running We should be able to wire up the rest, soon. --- diff --git a/Makefile.PL b/Makefile.PL index 5d3ee75c..6b2e16ff 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -8,14 +8,20 @@ WriteMakefile( VERSION => '0.0.0', AUTHOR => 'Eric Wong ', ABSTRACT => 'public-inbox.org infrastructure', - EXE_FILES => [qw/public-inbox-mda/], + EXE_FILES => [qw/public-inbox-mda public-inbox-cgi/], PREREQ_PM => { - # note: we use ssoma(1) and spamc(1), + # note: we use ssoma(1) and spamc(1), # NOT the Perl modules + # We also depend on git through ssoma. 'Email::MIME' => 0, 'Email::MIME::ContentType' => 0, 'Email::Filter' => 0, 'Email::Address' => 0, + 'Date::Parse' => 0, + 'Encode::MIME::Header' => 0, + 'XML::Atom::SimpleFeed' => 0, + # We have more test dependencies, but do not force + # users to install them }, ); diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index da877f3f..704effce 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -16,22 +16,23 @@ use constant DATEFMT => '%Y-%m-%dT%H:%M:%SZ'; our $dt_parser = DateTime::Format::Mail->new(loose => 1); # main function +# FIXME: takes too many args, cleanup sub generate { - my ($class, $git_dir, $max) = @_; + my ($class, $git_dir, $max, $pi_config, $listname, $cgi, $top) = @_; $max ||= 25; local $ENV{GIT_DIR} = $git_dir; - my $feed_opts = get_feedopts(); + my $feed_opts = get_feedopts($pi_config, $listname, $cgi); my $feed = XML::Atom::SimpleFeed->new( - title => $feed_opts->{title}, + title => $feed_opts->{description} || "unnamed feed", link => $feed_opts->{url} || "http://example.com/", link => { rel => 'self', - href => $feed_opts->{atomUrl} || + href => $feed_opts->{atomurl} || "http://example.com/atom", }, - id => $feed_opts->{email} || 'public-inbox@example.com', + id => $feed_opts->{address} || 'public-inbox@example.com', updated => strftime(DATEFMT, gmtime), ); @@ -48,12 +49,13 @@ sub generate { if ($line =~ /^:000000 100644 0{40} ([a-f0-9]{40})/) { my $add = $1; next if $deleted{$add}; - $nr += add_to_feed($feed_opts, $feed, $add); + $nr += add_to_feed($feed_opts, $feed, $add, $top); last if $nr >= $max; } elsif ($line =~ /^:100644 000000 ([a-f0-9]{40}) 0{40}/) { $deleted{$1} = 1; } } + close $log; $feed->as_string; @@ -61,12 +63,22 @@ sub generate { # private functions below sub get_feedopts { + my ($pi_config, $listname, $cgi) = @_; my %rv; - foreach my $key (qw(title url atomUrl email)) { - my $tmp = `git config publicInboxFeed.$key`; - chomp $tmp; - $rv{$key} = $tmp; + if ($pi_config && defined $listname && length $listname) { + foreach my $key (qw(description address url atomurl midurl)) { + $rv{$key} = $pi_config->get($listname, $key); + } + } + if ($cgi) { + my $cgi_url = $cgi->self_url; + my $url_base = $cgi_url; + $url_base =~ s!/?(?:index|all)\.atom\.xml\z!!; + $rv{url} ||= "$url_base/"; + $rv{midurl} ||= "$url_base/mid/%s.html"; + $rv{atomurl} = $cgi_url; } + \%rv; } @@ -83,7 +95,7 @@ sub feed_date { # returns 0 (skipped) or 1 (added) sub add_to_feed { - my ($feed_opts, $feed, $add) = @_; + my ($feed_opts, $feed, $add, $top) = @_; # we can use git cat-file --batch if performance becomes a # problem, but I doubt it... @@ -91,10 +103,14 @@ sub add_to_feed { return 0 if $? != 0; my $mime = Email::MIME->new($str); + if ($top && $mime->header("In-Reply-To")) { + return 0; + } + my $content = msg_content($mime); defined($content) or return 0; - my $mid_url = $feed_opts->{mid_url} || "http://example.com/mid/%s"; + my $midurl = $feed_opts->{midurl} || "http://example.com/mid/%s.html"; my $mid = utf8_header($mime, "Message-ID") or return 0; $mid =~ s/\A\z//; @@ -110,7 +126,7 @@ sub add_to_feed { my $email = $from[0]->address; defined $email or $email = ""; - my $url = sprintf($mid_url, uri_escape($mid)); + my $url = sprintf($midurl, uri_escape($mid)); my $date = utf8_header($mime, "Date"); $date or return 0; $date = feed_date($date) or return 0; diff --git a/public-inbox-cgi b/public-inbox-cgi new file mode 100755 index 00000000..cfcf3feb --- /dev/null +++ b/public-inbox-cgi @@ -0,0 +1,92 @@ +#!/usr/bin/perl -w +# Copyright (C) 2014, Eric Wong and all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# We focus on the lowest common denominators here: +# - targeted at text-only console browsers (lynx, w3m, etc..) +# - Only basic HTML, CSS only for line-wrapping
 text content for GUIs
+# - No JavaScript, graphics or icons allowed.
+# - Must not rely on static content
+# - UTF-8 is only for user-content, 7-bit US-ASCII for us
+
+use 5.008;
+use strict;
+use warnings;
+use CGI qw(:cgi :escapeHTML -nosticky); # PSGI/FastCGI/mod_perl compat
+use CGI::Util qw(unescape);
+use Encode;
+use PublicInbox::Config;
+our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
+our $pi_config;
+BEGIN {
+	$pi_config = PublicInbox::Config->new;
+	# TODO: detect and reload config as needed
+	if ($ENV{MOD_PERL}) {
+		CGI->compile;
+	}
+}
+
+sub main {
+	my $cgi = CGI->new;
+	if ($cgi->request_method !~ /\AGET|HEAD\z/) {
+		return r($cgi, "405 Method Not Allowed");
+	}
+	my $path_info = decode_utf8($ENV{PATH_INFO});
+	if ($path_info eq "/") {
+		r($cgi, "404 Not Found");
+	} elsif ($path_info =~ m!$LISTNAME_RE/?\z!o) {
+		get_list_log($cgi, $1);
+	} elsif ($path_info =~ m!$LISTNAME_RE/all\z!o) {
+		get_list_all($cgi, $1);
+	} elsif ($path_info =~ m!$LISTNAME_RE/index\.atom\.xml\z!o) {
+		get_atom_index($cgi, $1);
+	} elsif ($path_info =~ m!$LISTNAME_RE/all\.atom\.xml\z!o) {
+		get_atom_all($cgi, $1);
+	} elsif ($path_info =~ m!$LISTNAME_RE/mid/(\S+)\.txt\z!o) {
+		get_mid_txt($cgi, $1, $2);
+	} elsif ($path_info =~ m!$LISTNAME_RE/mid/(\S+)\.html\z!o) {
+		get_mid_html($cgi, $1, $2);
+	} elsif ($path_info =~ m!$LISTNAME_RE/mid/(\S+)\z!o) {
+		redirect_mid_html($cgi, $1, $2);
+	} else {
+		r($cgi, "404 Not Found");
+	}
+}
+
+binmode STDOUT, ':utf8';
+main();
+
+# simple response for errors
+sub r {
+	print $_[0]->header(-type => "text/plain",
+				-status => $_[1],
+				-charset => 'utf-8');
+}
+
+# /$LISTNAME/all.atom.xml	-> Atom feed, includes replies
+sub get_atom_all {
+	my ($cgi, $listname) = @_;
+	my $git_dir = $pi_config->get($listname, "mainrepo");
+	defined $git_dir or return r($cgi, "404 Not Found");
+
+	require PublicInbox::Feed;
+	print $cgi->header(-type => "application/xml", -charset => 'us-ascii',
+				-status => '200 OK');
+
+	print PublicInbox::Feed->generate($git_dir, undef,
+					$pi_config, $listname, $cgi);
+}
+
+# /$LISTNAME/index.atom.xml	-> Atom feed
+sub get_atom_index {
+	my ($cgi, $listname) = @_;
+	my $git_dir = $pi_config->get($listname, "mainrepo");
+	defined $git_dir or return r($cgi, "404 Not Found");
+
+	require PublicInbox::Feed;
+	print $cgi->header(-type => "application/xml", -charset => 'us-ascii',
+				-status => '200 OK');
+
+	print PublicInbox::Feed->generate($git_dir, undef,
+					$pi_config, $listname, $cgi, 1);
+}
diff --git a/t/cgi.t b/t/cgi.t
new file mode 100644
index 00000000..f359cf6e
--- /dev/null
+++ b/t/cgi.t
@@ -0,0 +1,153 @@
+# Copyright (C) 2014, Eric Wong  and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use File::Temp qw/tempdir/;
+use Cwd;
+use IPC::Run qw/run/;
+
+use constant CGI => "blib/script/public-inbox-cgi";
+my $mda = "blib/script/public-inbox-mda";
+my $tmpdir = tempdir(CLEANUP => 1);
+my $home = "$tmpdir/pi-home";
+my $pi_home = "$home/.public-inbox";
+my $pi_config = "$pi_home/config";
+my $maindir = "$tmpdir/main.git";
+my $main_bin = getcwd()."/t/main-bin";
+my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock
+my $addr = 'test-public@example.com';
+my $cfgpfx = "publicinbox.test";
+
+{
+	ok(-x "$main_bin/spamc",
+		"spamc ham mock found (run in top of source tree");
+	ok(-x $mda, "$mda is executable");
+	is(1, mkdir($home, 0755), "setup ~/ for testing");
+	is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox");
+	is(0, system(qw(git init -q --bare), $maindir), "git init (main)");
+
+	my %cfg = (
+		"$cfgpfx.address" => $addr,
+		"$cfgpfx.mainrepo" => $maindir,
+		"$cfgpfx.description" => 'test for public-inbox',
+	);
+	while (my ($k,$v) = each %cfg) {
+		is(0, system(qw(git config --file), $pi_config, $k, $v),
+			"setup $k");
+	}
+}
+
+{
+	my $failbox = "$home/fail.mbox";
+	local $ENV{PI_FAILBOX} = $failbox;
+	local $ENV{HOME} = $home;
+	local $ENV{RECIPIENT} = $addr;
+
+	# ensure successful message delivery
+	{
+		my $simple = Email::Simple->new(<
+To: You 
+Cc: $addr
+Message-Id: 
+Subject: hihi
+Date: Thu, 01 Jan 1970 00:00:00 +0000
+
+zzzzzz
+EOF
+		my $in = $simple->as_string;
+		run_with_env({PATH => $main_path}, [$mda], \$in);
+		local $ENV{GIT_DIR} = $maindir;
+		my $rev = `git rev-list HEAD`;
+		like($rev, qr/\A[a-f0-9]{40}/, "good revision committed");
+	}
+
+	# deliver a reply, too
+	{
+		my $reply = Email::Simple->new(<
+To: Me 
+Cc: $addr
+In-Reply-To: 
+Message-Id: 
+Subject: Re: hihi
+Date: Thu, 01 Jan 1970 00:00:01 +0000
+
+Me wrote:
+> zzzzzz
+
+what?
+EOF
+		my $in = $reply->as_string;
+		run_with_env({PATH => $main_path}, [$mda], \$in);
+		local $ENV{GIT_DIR} = $maindir;
+		my $rev = `git rev-list HEAD`;
+		like($rev, qr/\A[a-f0-9]{40}/, "good revision committed");
+	}
+
+}
+
+# obvious failures, first
+{
+	local $ENV{HOME} = $home;
+	my $res = cgi_run("/", "", "PUT");
+	like($res->{head}, qr/Status:\s*405/i, "PUT not allowed");
+
+	$res = cgi_run("/");
+	like($res->{head}, qr/Status:\s*404/i, "index returns 404");
+}
+
+# atom feeds
+{
+	local $ENV{HOME} = $home;
+	my $res = cgi_run("/test/all.atom.xml");
+	like($res->{body}, qr/test for public-inbox/,
+		"set title in XML feed");
+	like($res->{body},
+		qr!http://test\.example\.com/test/mid/blah%40example\.com!,
+		"link id set");
+	like($res->{body}, qr/what\?/, "reply included");
+
+	$res = cgi_run("/test/index.atom.xml");
+	unlike($res->{body}, qr/what\?/, "reply not included in index");
+}
+
+# indices
+{
+	local $ENV{HOME} = $home;
+	my $res = cgi_run("/test/all.atom.xml");
+	like($res->{body}, qr/<title>test for public-inbox/,
+		"set title in XML feed");
+	like($res->{body},
+		qr!http://test\.example\.com/test/mid/blah%40example\.com!,
+		"link id set");
+	like($res->{body}, qr/what\?/, "reply included");
+
+	$res = cgi_run("/test/index.atom.xml");
+	unlike($res->{body}, qr/what\?/, "reply not included in index");
+}
+
+done_testing();
+
+sub run_with_env {
+	my ($env, @args) = @_;
+	my $init = sub { foreach my $k (keys %$env) { $ENV{$k} = $env->{$k} } };
+	run(@args, init => $init);
+}
+
+sub cgi_run {
+	my %env = (
+		PATH_INFO => $_[0],
+		QUERY_STRING => $_[1] || "",
+		REQUEST_METHOD => $_[2] || "GET",
+		GATEWAY_INTERFACE => 'CGI/1.1',
+		HTTP_ACCEPT => '*/*',
+		HTTP_HOST => 'test.example.com',
+	);
+	my ($in, $out, $err) = ("", "", "");
+	my $rc = run_with_env(\%env, [CGI], \$in, \$out, \$err);
+	my ($head, $body) = split(/\r\n\r\n/, $out, 2);
+	{ head => $head, body => $body, rc => $rc, err => $err }
+}