From 7a42c9451400e20dcc6fbef21f88a68da16aa708 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 9 Apr 2014 01:59:06 +0000 Subject: [PATCH] preliminary HTML index generation Using JWZ threading might work decently for this. Haven't checked in lynx, yet. --- Makefile.PL | 1 + lib/PublicInbox/Feed.pm | 81 ++++++++++++++++++++++++++++++++++++----- t/html_index.t | 60 ++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+), 10 deletions(-) create mode 100644 t/html_index.t diff --git a/Makefile.PL b/Makefile.PL index 7dac4924..3e4f64d3 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -17,6 +17,7 @@ WriteMakefile( 'Email::MIME::ContentType' => 0, 'Email::Filter' => 0, 'Email::Address' => 0, + 'Mail::Thread' => '2.5', # 2.5+ needed for Email::Simple compat 'Date::Parse' => 0, 'Encode::MIME::Header' => 0, 'XML::Atom::SimpleFeed' => 0, diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index b77bbdfb..a6c1b9c1 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -14,6 +14,7 @@ use POSIX qw(strftime); use Date::Parse qw(strptime); use constant DATEFMT => '%Y-%m-%dT%H:%M:%SZ'; use PublicInbox::View; +use Mail::Thread; # main function sub generate { @@ -41,9 +42,39 @@ sub generate { add_to_feed($feed_opts, $feed, $add, $top); }); $feed->as_string; +} + +sub generate_html_index { + my ($class, $args) = @_; + my $max = $args->{max} || 50; + my $top = $args->{top}; # bool + local $ENV{GIT_DIR} = $args->{git_dir}; + my $feed_opts = get_feedopts($args); + my $title = escapeHTML($feed_opts->{description} || ""); + my @messages; + each_recent_blob($max, sub { + my $str = `git cat-file blob $_[0]`; + return 0 if $? != 0; + my $simple = Email::Simple->new($str); + $simple->body_set(""); # save some memory + push @messages, $simple; + 1; + }); + my $th = Mail::Thread->new(@messages); + $th->thread; + my @args = ( + "$title" . + '' . + '
');
+	push @args, $feed_opts->{midurl};
+	dump_html_line($_, 0, \@args) for $th->rootset;
+	$args[0] . '
'; } +# private subs + sub each_recent_blob { my ($max, $cb) = @_; @@ -78,25 +109,32 @@ sub get_feedopts { if ($pi_config && defined $listname && length $listname) { foreach my $key (qw(description address)) { - $rv{$key} = $pi_config->get($listname, $key); + $rv{$key} = $pi_config->get($listname, $key) || ""; } } + my $url_base; if ($cgi) { my $cgi_url = $cgi->self_url; - my $url_base = $cgi_url; + $url_base = $cgi_url; $url_base =~ s!/?(?:index|all)\.atom\.xml\z!!; - $rv{url} ||= "$url_base/"; - $rv{midurl} = "$url_base/mid/"; - $rv{fullurl} = "$url_base/full/"; $rv{atomurl} = $cgi_url; + } else { + $url_base = "http://example.com"; + $rv{atomurl} = "$url_base/index.atom.xml"; } + $rv{url} ||= "$url_base/"; + $rv{midurl} = "$url_base/mid/"; + $rv{fullurl} = "$url_base/full/"; \%rv; } sub utf8_header { - my ($mime, $name) = @_; - encode('utf8', decode('MIME-Header', $mime->header($name))); + my ($simple, $name) = @_; + my $val = $simple->header($name); + return "" unless defined $val; + $val =~ tr/\t\r\n / /s; + encode('utf8', decode('MIME-Header', $val)); } sub feed_date { @@ -127,11 +165,10 @@ sub add_to_feed { defined($content) or return 0; my $mid = utf8_header($mime, "Message-ID") or return 0; - $mid =~ s/\A\z//; + $mid =~ s/\A\z//; my $subject = utf8_header($mime, "Subject") || ""; - defined($subject) && length($subject) or return 0; + length($subject) or return 0; my $from = utf8_header($mime, "From") or return 0; @@ -156,4 +193,28 @@ sub add_to_feed { 1; } +sub dump_html_line { + my ($self, $level, $args) = @_; # args => [ $html, $midurl ] + $args->[0] .= (' ' x $level); + if ($self->message) { + my $simple = $self->message; + my $subj = utf8_header($simple, "Subject"); + my $mid = utf8_header($simple, "Message-ID"); + $mid =~ s/\A\z//; + my $url = $args->[1] . uri_escape($mid); + my $from = utf8_header($simple, "From"); + my @from = Email::Address->parse($from); + $from = $from[0]->name; + (defined($from) && length($from)) or $from = $from[0]->address; + $from = escapeHTML($from); + $subj = escapeHTML($subj); + $args->[0] .= "`-> $subj $from\n"; + } else { + $args->[0] .= "[ Message not available ]\n"; + } + dump_html_line($self->child, $level+1, $args) if $self->child; + dump_html_line($self->next, $level, $args) if $self->next; +} + 1; diff --git a/t/html_index.t b/t/html_index.t new file mode 100644 index 00000000..80a88997 --- /dev/null +++ b/t/html_index.t @@ -0,0 +1,60 @@ +# Copyright (C) 2014, Eric Wong and all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +use strict; +use warnings; +use Test::More; +use Email::Simple; +use PublicInbox::Feed; +use File::Temp qw/tempdir/; +my $tmpdir = tempdir(CLEANUP => 1); +my $git_dir = "$tmpdir/gittest"; + +# setup +{ + is(0, system(qw(git init -q --bare), $git_dir), "git init"); + my $prev = ""; + + foreach my $i (1..6) { + local $ENV{GIT_DIR} = $git_dir; + my $pid = open(my $pipe, "|-"); + defined $pid or die "fork/pipe failed: $!\n"; + if ($pid == 0) { + exec("ssoma-mda", $git_dir); + } + my $mid = "<$i\@example.com>"; + my $mid_line = "Message-ID: $mid\n"; + if ($prev) { + $mid_line .= "In-Reply-To: $prev\n"; + } + $prev = $mid; + my $simple = Email::Simple->new(< +To: U +$mid_line +Subject: zzz #$i +Date: Thu, 01 Jan 1970 00:00:00 +0000 + +> This is a long multi line quote so it should not be allowed to +> show up in its entirty in the Atom feed. drop me + +msg $i + +> inline me here, short quote + +keep me +EOF + print $pipe $simple->as_string or die "print failed: $!\n"; + close $pipe or die "close pipe failed: $!\n"; + } +} + +# check HTML index +{ + my $feed = PublicInbox::Feed->generate_html_index({ + git_dir => $git_dir, + max => 3 + }); + like($feed, qr/html/, "feed is valid HTML :)"); +} + +done_testing(); -- 2.44.0