lib/PublicInbox/Mbox.pm | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++--- lib/PublicInbox/WWW.pm | 8 ++++++-- t/cgi.t | 11 ++++++++--- diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index fc9df1adcbc8b6f1c5ffb1393e61552faea6d714..cb9d65d1a9fb069b498ee4fe2161824aebb6fe51 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -11,8 +11,7 @@ sub thread_mbox { my ($ctx, $srch) = @_; sub { my ($response) = @_; # Plack callback - my $w = $response->([200, ['Content-Type' => 'text/plain']]); - emit_mbox($w, $ctx, $srch); + emit_mbox($response, $ctx, $srch); } } @@ -40,7 +39,14 @@ $fh->write($buf); } sub emit_mbox { - my ($fh, $ctx, $srch) = @_; + my ($response, $ctx, $srch) = @_; + eval { require IO::Compress::Gzip }; + return need_gzip($response) if $@; + + # http://www.iana.org/assignments/media-types/application/gzip + # http://www.iana.org/assignments/media-types/application/mbox + my $fh = $response->([200, ['Content-Type' => 'application/gzip']]); + $fh = PublicInbox::MboxGz->new($fh); require PublicInbox::GitCatFile; require Email::Simple; @@ -62,6 +68,58 @@ } $opts{offset} += $nr; } while ($nr > 0); + + $fh->close; +} + +sub need_gzip { + my $fh = $_[0]->([501, ['Content-Type' => 'text/html']]); + my $title = 'gzipped mbox not available'; + $fh->write(<$title
$title
+The administrator needs to install the IO::Compress::Gzip Perl module
+to support gzipped mboxes.
+Return to index
+EOF +} + +1; + +# fh may not be a proper IO, so we wrap the write and close methods +# to prevent IO::Compress::Gzip from complaining +package PublicInbox::MboxGz; +use strict; +use warnings; +use fields qw(gz fh buf); + +sub new { + my ($class, $fh) = @_; + my $self = fields::new($class); + my $buf; + $self->{buf} = \$buf; + $self->{gz} = IO::Compress::Gzip->new(\$buf); + $self->{fh} = $fh; + $self; +} + +sub _flush_buf { + my ($self) = @_; + if (defined ${$self->{buf}}) { + $self->{fh}->write(${$self->{buf}}); + ${$self->{buf}} = undef; + } +} + +sub write { + $_[0]->{gz}->write($_[1]); + _flush_buf($_[0]); +} + +sub close { + my ($self) = @_; + $self->{gz}->close; + _flush_buf($self); + # do not actually close $fh } 1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index cd8a57055cc2410835c52e87d74ca7b0a1fd9607..68839d7cee3ef3af33717f48b5d3aacda1c20c94 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -53,7 +53,8 @@ # thread display } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) { invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx, $cgi); - } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.mbox\z!o) { + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.mbox\.gz!o) { + my $sfx = $3; invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $cgi); } elsif ($path_info =~ m!$LISTNAME_RE/f/\S+\.txt\z!o) { @@ -329,7 +330,10 @@ my $href = PublicInbox::Hval::ascii_html(uri_escape_utf8($ctx->{mid})); "../f/$href.html"; } -# /$LISTNAME/t/$MESSAGE_ID.mbox -> search results as mbox +# /$LISTNAME/t/$MESSAGE_ID.mbox.gz -> search results as gzipped mbox +# note: I'm not a big fan of other compression formats since they're +# significantly more expensive on CPU than gzip and less-widely available, +# especially on older systems. Stick to zlib since that's what git uses. sub get_thread_mbox { my ($ctx, $cgi) = @_; my $srch = searcher($ctx) or return need_search($ctx); diff --git a/t/cgi.t b/t/cgi.t index 2747a1598bdc06d01ab923c31621b99fe165a661..e87f7dcae6df6d2bb54759c5fabc58df23eb3f5d 100644 --- a/t/cgi.t +++ b/t/cgi.t @@ -183,15 +183,20 @@ # retrieve thread as an mbox { local $ENV{HOME} = $home; local $ENV{PATH} = $main_path; - my $path = "/test/t/blahblah%40example.com.mbox"; + my $path = "/test/t/blahblah%40example.com.mbox.gz"; my $res = cgi_run($path); like($res->{head}, qr/^Status: 501 /, "search not-yet-enabled"); my $indexed = system($index, $maindir) == 0; if ($indexed) { $res = cgi_run($path); - # use Data::Dumper; print STDERR Dumper($res); like($res->{head}, qr/^Status: 200 /, "search returned mbox"); - like($res->{body}, qr/^From /m, "From lines in mbox"); + eval { + require IO::Uncompress::Gunzip; + my $in = $res->{body}; + my $out; + IO::Uncompress::Gunzip::gunzip(\$in => \$out); + like($out, qr/^From /m, "From lines in mbox"); + }; } else { like($res->{head}, qr/^Status: 501 /, "search not available"); }