1 # Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # In public-inbox <=1.5.0, public-inbox-httpd favored "getline"
5 # response bodies to take a "pull"-based approach to feeding
6 # slow clients (as opposed to a more common "push" model).
8 # In newer versions, public-inbox-httpd supports a backpressure-aware
9 # pull/push model which also accounts for slow git blob storage.
10 # async_next callbacks only run when the DS {wbuf} is drained
11 # async_eml callbacks only run when a blob arrives from git.
13 # We continue to support getline+close for generic PSGI servers.
14 package PublicInbox::GzipFilter;
16 use parent qw(Exporter);
17 use Compress::Raw::Zlib qw(Z_OK);
18 use PublicInbox::CompressNoop;
20 use PublicInbox::GitAsyncCat;
22 our @EXPORT_OK = qw(gzf_maybe);
23 my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
24 my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip);
26 sub new { bless {}, shift } # qspawn filter
28 # for Qspawn if using $env->{'pi-httpd.async'}
30 my ($self, $http_out) = @_;
31 $self->{http_out} = $http_out; # PublicInbox::HTTP::{Chunked,Identity}
36 my ($res_hdr, $env) = @_;
37 if (($env->{HTTP_ACCEPT_ENCODING} // '') =~ /\bgzip\b/) {
38 $env->{'plack.skip-deflater'} = 1;
39 push @$res_hdr, @GZIP_HDRS;
42 PublicInbox::CompressNoop::new();
46 sub gzf_maybe ($$) { bless { gz => gz_or_noop(@_) }, __PACKAGE__ }
49 my ($self, $code, $res_hdr) = @_;
50 my $env = $self->{env};
51 $self->{gz} //= gz_or_noop($res_hdr, $env);
52 if ($env->{'pi-httpd.async'}) {
53 my $http = $env->{'psgix.io'}; # PublicInbox::HTTP
54 $http->{forward} = $self;
56 my ($wcb) = @_; # -httpd provided write callback
57 $self->{http_out} = $wcb->([$code, $res_hdr]);
58 $self->can('async_next')->($http); # start stepping
60 } else { # generic PSGI code path
61 [ $code, $res_hdr, $self ];
66 my ($res_hdr, $env) = @_;
67 return if ($env->{HTTP_ACCEPT_ENCODING} // '') !~ /\bgzip\b/;
68 my $hdr = join("\n", @$res_hdr);
69 return if $hdr !~ m!^Content-Type\n
70 (?:(?:text/(?:html|plain))|
71 application/atom\+xml)\b!ixsm;
72 return if $hdr =~ m!^Content-Encoding\ngzip\n!smi;
73 return if $hdr =~ m!^Content-Length\n[0-9]+\n!smi;
74 return if $hdr =~ m!^Transfer-Encoding\n!smi;
75 # in case Plack::Middleware::Deflater is loaded:
76 return if $env->{'plack.skip-deflater'}++;
77 push @$res_hdr, @GZIP_HDRS;
78 bless {}, __PACKAGE__;
82 my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
83 $err == Z_OK or die "Deflate->new failed: $err";
87 sub gone { # what: search/over/mm
88 my ($ctx, $what) = @_;
89 warn "W: `$ctx->{ibx}->{name}' $what went away unexpectedly\n";
93 # for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
94 # Also used for ->getline callbacks
96 my $self = $_[0]; # $_[1] => input
98 # allocate the zlib context lazily here, instead of in ->new.
99 # Deflate contexts are memory-intensive and this object may
100 # be sitting in the Qspawn limiter queue for a while.
101 my $gz = $self->{gz} //= gzip_or_die();
102 my $zbuf = delete($self->{zbuf});
103 if (defined $_[1]) { # my $buf = $_[1];
104 my $err = $gz->deflate($_[1], $zbuf);
105 die "gzip->deflate: $err" if $err != Z_OK;
106 return $zbuf if length($zbuf) >= 8192;
108 $self->{zbuf} = $zbuf;
110 } else { # undef == EOF
111 my $err = $gz->flush($zbuf);
112 die "gzip->flush: $err" if $err != Z_OK;
118 # my $ret = bytes::length($_[1]); # XXX does anybody care?
119 $_[0]->{http_out}->write(translate($_[0], $_[1]));
122 # similar to ->translate; use this when we're sure we know we have
123 # more data to buffer after this
125 my $self = $_[0]; # $_[1] => input
126 my $err = $self->{gz}->deflate($_[1], $self->{zbuf});
127 die "gzip->deflate: $err" if $err != Z_OK;
131 # flushes and returns the final bit of gzipped data
133 my $self = $_[0]; # $_[1] => final input (optional)
134 my $zbuf = delete $self->{zbuf};
135 my $gz = delete $self->{gz};
138 $err = $gz->deflate($_[1], $zbuf);
139 die "gzip->deflate: $err" if $err != Z_OK;
141 $err = $gz->flush($zbuf);
142 die "gzip->flush: $err" if $err != Z_OK;
148 if (my $http_out = delete $self->{http_out}) {
149 $http_out->write(zflush($self));
156 if (my $env = $self->{env}) {
157 eval { $env->{'psgi.errors'}->print(@_, "\n") };
158 warn("E: error printing to psgi.errors: $@", @_) if $@;
159 my $http = $env->{'psgix.io'} or return; # client abort
160 eval { $http->close }; # should hit our close
161 warn "E: error in http->close: $@" if $@;
162 eval { $self->close }; # just in case...
163 warn "E: error in self->close: $@" if $@;
169 # this is public-inbox-httpd-specific
170 sub async_blob_cb { # git->cat_async callback
171 my ($bref, $oid, $type, $size, $self) = @_;
172 my $http = $self->{env}->{'psgix.io'};
173 $http->{forward} or return; # client aborted
174 my $smsg = $self->{smsg} or bail($self, 'BUG: no smsg');
175 if (!defined($oid)) {
176 # it's possible to have TOCTOU if an admin runs
177 # public-inbox-(edit|purge), just move onto the next message
178 warn "E: $smsg->{blob} missing in $self->{ibx}->{inboxdir}\n";
179 return $http->next_step($self->can('async_next'));
181 $smsg->{blob} eq $oid or bail($self, "BUG: $smsg->{blob} != $oid");
182 eval { $self->async_eml(PublicInbox::Eml->new($bref)) };
183 bail($self, "E: async_eml: $@") if $@;
184 if ($self->{-low_prio}) {
185 push(@{$self->{www}->{-low_prio_q}}, $self) == 1 and
186 PublicInbox::DS::requeue($self->{www});
188 $http->next_step($self->can('async_next'));
193 my ($self, $smsg) = @_;
194 ibx_async_cat($self->{ibx}, $smsg->{blob}, \&async_blob_cb, $self);