X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FGzipFilter.pm;h=a11ba73fa5b13ae5c680f9664f0f382a11466022;hb=refs%2Fheads%2Fmaster;hp=c621617107259ec05954b0eef5370c282f773166;hpb=96b0a14be7e62742ad06f0a37c3cba61fe6c51e7;p=public-inbox.git diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm index c6216171..a11ba73f 100644 --- a/lib/PublicInbox/GzipFilter.pm +++ b/lib/PublicInbox/GzipFilter.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # # In public-inbox <=1.5.0, public-inbox-httpd favored "getline" @@ -46,18 +46,20 @@ sub gz_or_noop { sub gzf_maybe ($$) { bless { gz => gz_or_noop(@_) }, __PACKAGE__ } sub psgi_response { + # $code may be an HTTP response code (e.g. 200) or a CODE ref (mbox_hdr) my ($self, $code, $res_hdr) = @_; - my $env = $self->{env}; - $self->{gz} //= gz_or_noop($res_hdr, $env); - if ($env->{'pi-httpd.async'}) { - my $http = $env->{'psgix.io'}; # PublicInbox::HTTP + if ($self->{env}->{'pi-httpd.async'}) { + my $http = $self->{env}->{'psgix.io'}; # PublicInbox::HTTP $http->{forward} = $self; sub { my ($wcb) = @_; # -httpd provided write callback - $self->{http_out} = $wcb->([$code, $res_hdr]); + $self->{wcb_args} = [ $code, $res_hdr, $wcb ]; $self->can('async_next')->($http); # start stepping }; } else { # generic PSGI code path + ref($code) eq 'CODE' and + ($code, $res_hdr) = @{$code->($self)}; + $self->{gz} //= gz_or_noop($res_hdr, $self->{env}); [ $code, $res_hdr, $self ]; } } @@ -92,63 +94,81 @@ sub gone { # what: search/over/mm # for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'} # Also used for ->getline callbacks -sub translate ($$) { - my $self = $_[0]; # $_[1] => input +sub translate { + my $self = shift; # $_[1] => input # allocate the zlib context lazily here, instead of in ->new. # Deflate contexts are memory-intensive and this object may # be sitting in the Qspawn limiter queue for a while. - my $gz = $self->{gz} //= gzip_or_die(); - my $zbuf = delete($self->{zbuf}); - if (defined $_[1]) { # my $buf = $_[1]; - my $err = $gz->deflate($_[1], $zbuf); - die "gzip->deflate: $err" if $err != Z_OK; - return $zbuf if length($zbuf) >= 8192; - - $self->{zbuf} = $zbuf; - ''; + $self->{gz} //= gzip_or_die(); + if (defined $_[0]) { # my $buf = $_[1]; + zmore($self, @_); + length($self->{zbuf}) >= 8192 ? delete($self->{zbuf}) : ''; } else { # undef == EOF - my $err = $gz->flush($zbuf); - die "gzip->flush: $err" if $err != Z_OK; - $zbuf; + zflush($self); } } +# returns PublicInbox::HTTP::{Chunked,Identity} +sub http_out ($) { + my ($self) = @_; + $self->{http_out} // do { + my $args = delete $self->{wcb_args} // return undef; + my $wcb = pop @$args; # from PublicInbox:HTTP async + # $args->[0] may be \&mbox_hdr or similar + $args = $args->[0]->($self) if ref($args->[0]) eq 'CODE'; + $self->{gz} //= gz_or_noop($args->[1], $self->{env}); + $self->{http_out} = $wcb->($args); # $wcb->([$code, $hdr_ary]) + }; +} + sub write { + my $self = shift; # my $ret = bytes::length($_[1]); # XXX does anybody care? - $_[0]->{http_out}->write(translate($_[0], $_[1])); + http_out($self)->write($self->translate(@_)); +} + +sub zfh { + $_[0]->{zfh} // do { + open($_[0]->{zfh}, '>>', \($_[0]->{pbuf} //= '')) or + die "open: $!"; + $_[0]->{zfh} + }; } # similar to ->translate; use this when we're sure we know we have # more data to buffer after this sub zmore { - my $self = $_[0]; # $_[1] => input - my $err = $self->{gz}->deflate($_[1], $self->{zbuf}); - die "gzip->deflate: $err" if $err != Z_OK; - undef; + my $self = shift; + my $zfh = delete $self->{zfh}; + if (@_ > 1 || $zfh) { + print { $zfh // zfh($self) } @_; + @_ = (delete $self->{pbuf}); + delete $self->{zfh}; + }; + http_out($self); + my $err; + ($err = $self->{gz}->deflate($_[0], $self->{zbuf})) == Z_OK or + die "gzip->deflate: $err"; } # flushes and returns the final bit of gzipped data -sub zflush ($;$) { - my $self = $_[0]; # $_[1] => final input (optional) - my $zbuf = delete $self->{zbuf}; - my $gz = delete $self->{gz}; +sub zflush ($;@) { + my $self = shift; # $_[1..Inf] => final input (optional) + zmore($self, @_) if scalar(@_) || $self->{zfh}; + # not a bug, recursing on DS->write failure + my $gz = delete $self->{gz} // return ''; my $err; - if (defined $_[1]) { - $err = $gz->deflate($_[1], $zbuf); - die "gzip->deflate: $err" if $err != Z_OK; - } - $err = $gz->flush($zbuf); - die "gzip->flush: $err" if $err != Z_OK; + my $zbuf = delete $self->{zbuf}; + ($err = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $err"; $zbuf; } sub close { my ($self) = @_; - if (my $http_out = delete $self->{http_out}) { - $http_out->write(zflush($self)); - $http_out->close; - } + my $http_out = http_out($self) // return; + $http_out->write($self->zflush); + (delete($self->{http_out}) // return)->close; } sub bail { @@ -168,7 +188,7 @@ sub bail { # this is public-inbox-httpd-specific sub async_blob_cb { # git->cat_async callback my ($bref, $oid, $type, $size, $self) = @_; - my $http = $self->{env}->{'psgix.io'}; + my $http = $self->{env}->{'psgix.io'}; # PublicInbox::HTTP $http->{forward} or return; # client aborted my $smsg = $self->{smsg} or bail($self, 'BUG: no smsg'); if (!defined($oid)) { @@ -180,7 +200,7 @@ sub async_blob_cb { # git->cat_async callback $smsg->{blob} eq $oid or bail($self, "BUG: $smsg->{blob} != $oid"); eval { $self->async_eml(PublicInbox::Eml->new($bref)) }; bail($self, "E: async_eml: $@") if $@; - if ($self->{-low_prio}) { + if ($self->{-low_prio}) { # run via PublicInbox::WWW::event_step push(@{$self->{www}->{-low_prio_q}}, $self) == 1 and PublicInbox::DS::requeue($self->{www}); } else {