1 # Copyright (C) 2020 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
5 package PublicInbox::GzipFilter;
7 use parent qw(Exporter);
8 use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
9 use PublicInbox::CompressNoop;
11 use PublicInbox::GitAsyncCat;
13 our @EXPORT_OK = qw(gzf_maybe);
14 my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
15 my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip);
17 sub new { bless {}, shift }
19 # for Qspawn if using $env->{'pi-httpd.async'}
21 my ($self, $http_out) = @_;
22 $self->{http_out} = $http_out;
27 my ($res_hdr, $env) = @_;
28 if (($env->{HTTP_ACCEPT_ENCODING} // '') =~ /\bgzip\b/) {
29 $env->{'plack.skip-deflater'} = 1;
30 push @$res_hdr, @GZIP_HDRS;
33 PublicInbox::CompressNoop::new();
37 sub gzf_maybe ($$) { bless { gz => gz_or_noop(@_) }, __PACKAGE__ }
40 my ($self, $code, $res_hdr, $next_cb, $eml_cb) = @_;
41 my $env = $self->{env};
42 $self->{gz} //= gz_or_noop($res_hdr, $env);
43 if ($env->{'pi-httpd.async'}) {
44 $self->{async_next} = $next_cb;
45 $self->{async_eml} = $eml_cb;
46 my $http = $env->{'psgix.io'}; # PublicInbox::HTTP
47 $http->{forward} = $self;
49 my ($wcb) = @_; # -httpd provided write callback
50 $self->{http_out} = $wcb->([$code, $res_hdr]);
51 $next_cb->($http); # start stepping
53 } else { # generic PSGI code path
54 [ $code, $res_hdr, $self ];
59 my ($res_hdr, $env) = @_;
60 return if ($env->{HTTP_ACCEPT_ENCODING} // '') !~ /\bgzip\b/;
61 my $hdr = join("\n", @$res_hdr);
62 return if $hdr !~ m!^Content-Type\n
63 (?:(?:text/(?:html|plain))|
64 application/atom\+xml)\b!ixsm;
65 return if $hdr =~ m!^Content-Encoding\ngzip\n!smi;
66 return if $hdr =~ m!^Content-Length\n[0-9]+\n!smi;
67 return if $hdr =~ m!^Transfer-Encoding\n!smi;
68 # in case Plack::Middleware::Deflater is loaded:
69 return if $env->{'plack.skip-deflater'}++;
70 push @$res_hdr, @GZIP_HDRS;
71 bless {}, __PACKAGE__;
75 my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
76 $err == Z_OK or die "Deflate->new failed: $err";
80 # for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
81 # Also used for ->getline callbacks
83 my $self = $_[0]; # $_[1] => input
85 # allocate the zlib context lazily here, instead of in ->new.
86 # Deflate contexts are memory-intensive and this object may
87 # be sitting in the Qspawn limiter queue for a while.
88 my $gz = $self->{gz} //= gzip_or_die();
89 my $zbuf = delete($self->{zbuf});
90 if (defined $_[1]) { # my $buf = $_[1];
91 my $err = $gz->deflate($_[1], $zbuf);
92 die "gzip->deflate: $err" if $err != Z_OK;
93 return $zbuf if length($zbuf) >= 8192;
95 $self->{zbuf} = $zbuf;
97 } else { # undef == EOF
98 my $err = $gz->flush($zbuf, Z_FINISH);
99 die "gzip->flush: $err" if $err != Z_OK;
105 # my $ret = bytes::length($_[1]); # XXX does anybody care?
106 $_[0]->{http_out}->write(translate($_[0], $_[1]));
109 # similar to ->translate; use this when we're sure we know we have
110 # more data to buffer after this
112 my $self = $_[0]; # $_[1] => input
113 my $err = $self->{gz}->deflate($_[1], $self->{zbuf});
114 die "gzip->deflate: $err" if $err != Z_OK;
118 # flushes and returns the final bit of gzipped data
120 my $self = $_[0]; # $_[1] => final input (optional)
121 my $zbuf = delete $self->{zbuf};
122 my $gz = delete $self->{gz};
125 $err = $gz->deflate($_[1], $zbuf);
126 die "gzip->deflate: $err" if $err != Z_OK;
128 $err = $gz->flush($zbuf, Z_FINISH);
129 die "gzip->flush: $err" if $err != Z_OK;
135 if (my $http_out = delete $self->{http_out}) {
136 $http_out->write(zflush($self));
141 # this is public-inbox-httpd-specific
142 sub async_blob_cb { # git->cat_async callback
143 my ($bref, $oid, $type, $size, $self) = @_;
144 my $http = $self->{env}->{'psgix.io'} or return; # client abort
145 my $smsg = $self->{smsg} or die 'BUG: no smsg';
146 if (!defined($oid)) {
147 # it's possible to have TOCTOU if an admin runs
148 # public-inbox-(edit|purge), just move onto the next message
149 return $http->next_step($self->{async_next});
151 $smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
152 $self->{async_eml}->($self, PublicInbox::Eml->new($bref));
153 $http->next_step($self->{async_next});
157 my ($self, $smsg) = @_;
158 git_async_cat($self->{-inbox}->git, $smsg->{blob},
159 \&async_blob_cb, $self);