X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FHTTP.pm;h=e65988bedf06fe2c5236e333bd7fc075021168b4;hb=c410bf707fb77b15698e14119dbf6e14e43479ba;hp=6576bf684931e6835ef9d07b4268e265a5ed8542;hpb=5fd049f32fde94f53f431ae3f37143440ee0cf09;p=public-inbox.git diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index 6576bf68..e65988be 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -1,37 +1,41 @@ -# Copyright (C) 2016 all contributors +# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # # Generic PSGI server for convenience. It aims to provide # a consistent experience for public-inbox admins so they don't have # to learn different ways to admin both NNTP and HTTP components. -# There's nothing public-inbox-specific, here. +# There's nothing which depends on public-inbox, here. # Each instance of this class represents a HTTP client socket - +# +# fields: +# httpd: PublicInbox::HTTPD ref +# env: PSGI env hashref +# input_left: bytes left to read in request body (e.g. POST/PUT) +# remote_addr: remote IP address as a string (e.g. "127.0.0.1") +# remote_port: peer port +# forward: response body object, response to ->getline + ->close +# alive: HTTP keepalive state: +# 0: drop connection when done +# 1: keep connection when done +# 2: keep connection, chunk responses package PublicInbox::HTTP; use strict; -use warnings; -use base qw(Danga::Socket); -use fields qw(httpd env rbuf input_left remote_addr remote_port); +use parent qw(PublicInbox::DS); use Fcntl qw(:seek); use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl +use Plack::Util; use HTTP::Status qw(status_message); use HTTP::Date qw(time2str); -use IO::File; +use PublicInbox::DS qw(msg_more); +use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); +use PublicInbox::Tmpfile; use constant { CHUNK_START => -1, # [a-f0-9]+\r\n CHUNK_END => -2, # \r\n CHUNK_ZEND => -3, # \r\n CHUNK_MAX_HDR => 256, }; - -# FIXME: duplicated code with NNTP.pm -my $WEAKEN = {}; # string(inbox) -> inbox -my $WEAKTIMER; -sub weaken_task () { - $WEAKTIMER = undef; - $_->weaken_all for values %$WEAKEN; - $WEAKEN = {}; -} +use Errno qw(EAGAIN); # Use the same configuration parameter as git since this is primarily # a slow-client sponge for git-http-backend @@ -39,7 +43,7 @@ sub weaken_task () { our $MAX_REQUEST_BUFFER = $ENV{GIT_HTTP_MAX_REQUEST_BUFFER} || (10 * 1024 * 1024); -my $null_io = IO::File->new('/dev/null', '<'); +open(my $null_io, '<', '/dev/null') or die "failed to open /dev/null: $!"; my $http_date; my $prev = 0; sub http_date () { @@ -49,72 +53,66 @@ sub http_date () { sub new ($$$) { my ($class, $sock, $addr, $httpd) = @_; - my $self = fields::new($class); - $self->SUPER::new($sock); - $self->{httpd} = $httpd; - $self->{rbuf} = ''; + my $self = bless { httpd => $httpd }, $class; + my $ev = EPOLLIN; + my $wbuf; + if ($sock->can('accept_SSL') && !$sock->accept_SSL) { + return CORE::close($sock) if $! != EAGAIN; + $ev = PublicInbox::TLS::epollbit() or return CORE::close($sock); + $wbuf = [ \&PublicInbox::DS::accept_tls_step ]; + } + $self->{wbuf} = $wbuf if $wbuf; ($self->{remote_addr}, $self->{remote_port}) = PublicInbox::Daemon::host_with_port($addr); - $self->watch_read(1); - $self; + $self->SUPER::new($sock, $ev | EPOLLONESHOT); } -sub event_read { # called by Danga::Socket +sub event_step { # called by PublicInbox::DS my ($self) = @_; - return event_read_input($self) if defined $self->{env}; + return unless $self->flush_write && $self->{sock}; - my $off = length($self->{rbuf}); - my $r = sysread($self->{sock}, $self->{rbuf}, 8192, $off); - if (defined $r) { - return $self->close if $r == 0; - return rbuf_process($self); - } - return if $!{EAGAIN}; # no need to call watch_read(1) again + # only read more requests if we've drained the write buffer, + # otherwise we can be buffering infinitely w/o backpressure - # common for clients to break connections without warning, - # would be too noisy to log here: - return $self->close; -} - -sub rbuf_process { - my ($self) = @_; + return read_input($self) if ref($self->{env}); + my $rbuf = $self->{rbuf} // (\(my $x = '')); my %env = %{$self->{httpd}->{env}}; # full hash copy - my $r = parse_http_request($self->{rbuf}, \%env); - - # We do not support Trailers in chunked requests, for now - # (they are rarely-used and git (as of 2.7.2) does not use them) - if ($r == -1 || $env{HTTP_TRAILER} || - # this length-check is necessary for PURE_PERL=1: - ($r == -2 && length($self->{rbuf}) > 0x4000)) { - return quit($self, 400); + my $r; + while (($r = parse_http_request($$rbuf, \%env)) < 0) { + # We do not support Trailers in chunked requests, for + # now (they are rarely-used and git (as of 2.7.2) does + # not use them). + # this length-check is necessary for PURE_PERL=1: + if ($r == -1 || $env{HTTP_TRAILER} || + ($r == -2 && length($$rbuf) > 0x4000)) { + return quit($self, 400); + } + $self->do_read($rbuf, 8192, length($$rbuf)) or return; } - return $self->watch_read(1) if $r < 0; # incomplete - $self->{rbuf} = substr($self->{rbuf}, $r); - - my $len = input_prepare($self, \%env); - defined $len or return write_err($self); # EMFILE/ENFILE + return quit($self, 400) if grep(/\s/, keys %env); # stop smugglers + $$rbuf = substr($$rbuf, $r); + my $len = input_prepare($self, \%env) // + return write_err($self, undef); # EMFILE/ENFILE - $len ? event_read_input($self) : app_dispatch($self); + $len ? read_input($self, $rbuf) : app_dispatch($self, undef, $rbuf); } -sub event_read_input ($) { - my ($self) = @_; +sub read_input ($;$) { + my ($self, $rbuf) = @_; + $rbuf //= $self->{rbuf} // (\(my $x = '')); my $env = $self->{env}; - return event_read_input_chunked($self) if env_chunked($env); + return read_input_chunked($self, $rbuf) if env_chunked($env); # env->{CONTENT_LENGTH} (identity) - my $sock = $self->{sock}; - my $len = $self->{input_left}; - $self->{input_left} = undef; - my $rbuf = \($self->{rbuf}); + my $len = delete $self->{input_left}; my $input = $env->{'psgi.input'}; while ($len > 0) { if ($$rbuf ne '') { - my $w = write_in_full($input, $rbuf, $len); - return write_err($self) unless $w; + my $w = syswrite($input, $$rbuf, $len); + return write_err($self, $len) unless $w; $len -= $w; die "BUG: $len < 0 (w=$w)" if $len < 0; if ($len == 0) { # next request may be pipelined @@ -123,30 +121,30 @@ sub event_read_input ($) { } $$rbuf = ''; } - my $r = sysread($sock, $$rbuf, 8192); - return recv_err($self, $r, $len) unless $r; + $self->do_read($rbuf, 8192) or return recv_err($self, $len); # continue looping if $r > 0; } - app_dispatch($self, $input); + app_dispatch($self, $input, $rbuf); } sub app_dispatch { - my ($self, $input) = @_; - $self->watch_read(0); + my ($self, $input, $rbuf) = @_; + $self->rbuf_idle($rbuf); my $env = $self->{env}; + $self->{env} = undef; # for exists() check in ->busy $env->{REMOTE_ADDR} = $self->{remote_addr}; $env->{REMOTE_PORT} = $self->{remote_port}; - if (my $host = $env->{HTTP_HOST}) { - $host =~ s/:(\d+)\z// and $env->{SERVER_PORT} = $1; + if (defined(my $host = $env->{HTTP_HOST})) { + $host =~ s/:([0-9]+)\z// and $env->{SERVER_PORT} = $1; $env->{SERVER_NAME} = $host; } if (defined $input) { sysseek($input, 0, SEEK_SET) or die "BUG: psgi.input seek failed: $!"; } - # note: NOT $self->{sock}, we want our close (+ Danga::Socket::close), + # note: NOT $self->{sock}, we want our close (+ PublicInbox::DS::close), # to do proper cleanup: - $env->{'psgix.io'} = $self; # only for ->close + $env->{'psgix.io'} = $self; # for ->close or async_pass my $res = Plack::Util::run_app($self->{httpd}->{app}, $env); eval { if (ref($res) eq 'CODE') { @@ -155,7 +153,10 @@ sub app_dispatch { response_write($self, $env, $res); } }; - $self->close if $@; + if ($@) { + warn "response_write error: $@"; + $self->close; + } } sub response_header_write { @@ -180,195 +181,185 @@ sub response_header_write { my $conn = $env->{HTTP_CONNECTION} || ''; my $term = defined($len) || $chunked; - my $alive = $term && - (($proto eq 'HTTP/1.1' && $conn !~ /\bclose\b/i) || - ($conn =~ /\bkeep-alive\b/i)); - - $h .= 'Connection: ' . ($alive ? 'keep-alive' : 'close'); - $h .= "\r\nDate: " . http_date() . "\r\n\r\n"; + my $prot_persist = ($proto eq 'HTTP/1.1') && ($conn !~ /\bclose\b/i); + my $alive; + if (!$term && $prot_persist) { # auto-chunk + $chunked = $alive = 2; + $h .= "Transfer-Encoding: chunked\r\n"; + # no need for "Connection: keep-alive" with HTTP/1.1 + } elsif ($term && ($prot_persist || ($conn =~ /\bkeep-alive\b/i))) { + $alive = 1; + $h .= "Connection: keep-alive\r\n"; + } else { + $alive = 0; + $h .= "Connection: close\r\n"; + } + $h .= 'Date: ' . http_date() . "\r\n\r\n"; if (($len || $chunked) && $env->{REQUEST_METHOD} ne 'HEAD') { - more($self, $h); + msg_more($self, $h); } else { - $self->write($h); + $self->write(\$h); } $alive; } -sub response_write { - my ($self, $env, $res) = @_; - my $alive = response_header_write($self, $env, $res); +# middlewares such as Deflater may write empty strings +sub chunked_write ($$) { + my $self = $_[0]; + return if $_[1] eq ''; + msg_more($self, sprintf("%x\r\n", length($_[1]))); + msg_more($self, $_[1]); - # middlewares such as Deflater may write empty strings - my $write = sub { $self->write($_[0]) if $_[0] ne '' }; - my $close = sub { - if ($alive) { - $self->event_write; # watch for readability if done + # use $self->write(\"\n\n") if you care about real-time + # streaming responses, public-inbox WWW does not. + msg_more($self, "\r\n"); +} + +sub identity_write ($$) { + my $self = $_[0]; + $self->write(\($_[1])) if $_[1] ne ''; +} + +sub response_done { + my ($self, $alive) = @_; + delete $self->{env}; # we're no longer busy + $self->write(\"0\r\n\r\n") if $alive == 2; + $self->write($alive ? $self->can('requeue') : \&close); +} + +sub getline_pull { + my ($self) = @_; + my $forward = $self->{forward}; + + # limit our own running time for fairness with other + # clients and to avoid buffering too much: + my $buf = eval { + local $/ = \65536; + $forward->getline; + } if $forward; + + if (defined $buf) { + # may close in PublicInbox::DS::write + if ($self->{alive} == 2) { + chunked_write($self, $buf); } else { - Danga::Socket::write($self, sub { $self->close }); - } - if (my $obj = $env->{'pi-httpd.inbox'}) { - # grace period for reaping resources - $WEAKEN->{"$obj"} = $obj; - $WEAKTIMER ||= Danga::Socket->AddTimer(60, *weaken_task); + identity_write($self, $buf); } - $self->{env} = undef; - }; - if (defined(my $body = $res->[2])) { - if (ref $body eq 'ARRAY') { - $write->($_) foreach @$body; - $close->(); - } elsif ($body->can('async_pass')) { # HTTPD::Async - # prevent us from reading the body faster than we - # can write to the client - my $restart_read = sub { $body->watch_read(1) }; - $body->async_pass(sub { - local $/ = \8192; - my $buf = $body->getline; - if (defined $buf) { - $write->($buf); - if ($self->{write_buf_size}) { - $body->watch_read(0); - $self->write($restart_read); - } - return; # continue waiting - } - $body->close; - $close->(); - }); - } else { - my $pull; - $pull = sub { - local $/ = \8192; - while (defined(my $buf = $body->getline)) { - $write->($buf); - if ($self->{write_buf_size}) { - $self->write($pull); - return; - } - } - $pull = undef; - $body->close(); - $close->(); - }; - $pull->(); + if ($self->{sock}) { + # autovivify wbuf + my $new_size = push(@{$self->{wbuf}}, \&getline_pull); + + # wbuf may be populated by {chunked,identity}_write() + # above, no need to rearm if so: + $self->requeue if $new_size == 1; + return; # likely } - } else { - # this is returned to the calling application: - Plack::Util::inline_object(write => $write, close => $close); + } elsif ($@) { + warn "response ->getline error: $@"; + $self->close; } -} - -use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0; -sub more ($$) { - my $self = $_[0]; - if (MSG_MORE && !$self->{write_buf_size}) { - my $n = send($self->{sock}, $_[1], MSG_MORE); - if (defined $n) { - my $dlen = length($_[1]); - return 1 if $n == $dlen; # all done! - $_[1] = substr($_[1], $n, $dlen - $n); - # fall through to normal write: + # avoid recursion + if (delete $self->{forward}) { + eval { $forward->close }; + if ($@) { + warn "response ->close error: $@"; + $self->close; # idempotent } } - $self->write($_[1]); -} - -my $pipelineq = []; -my $next_tick; -sub process_pipelineq () { - $next_tick = undef; - my $q = $pipelineq; - $pipelineq = []; - rbuf_process($_) foreach @$q; + response_done($self, delete $self->{alive}); } -# overrides existing Danga::Socket method -sub event_write { - my ($self) = @_; - # only continue watching for readability when we are done writing: - return if $self->write(undef) != 1; - - if ($self->{rbuf} eq '') { # wait for next request - $self->watch_read(1); - } else { # avoid recursion for pipelined requests - push @$pipelineq, $self; - $next_tick ||= Danga::Socket->AddTimer(0, *process_pipelineq); +sub response_write { + my ($self, $env, $res) = @_; + my $alive = response_header_write($self, $env, $res); + if (defined(my $body = $res->[2])) { + if (ref $body eq 'ARRAY') { + if ($alive == 2) { + chunked_write($self, $_) for @$body; + } else { + identity_write($self, $_) for @$body; + } + response_done($self, $alive); + } else { + $self->{forward} = $body; + $self->{alive} = $alive; + getline_pull($self); # kick-off! + } + # these are returned to the calling application: + } elsif ($alive == 2) { + bless [ $self, $alive ], 'PublicInbox::HTTP::Chunked'; + } else { + bless [ $self, $alive ], 'PublicInbox::HTTP::Identity'; } } sub input_prepare { my ($self, $env) = @_; - my $input = $null_io; - my $len = $env->{CONTENT_LENGTH}; - if ($len) { - if ($len > $MAX_REQUEST_BUFFER) { - quit($self, 413); - return; - } - $input = IO::File->new_tmpfile; - } elsif (env_chunked($env)) { + my ($input, $len); + + # rfc 7230 3.3.2, 3.3.3,: favor Transfer-Encoding over Content-Length + my $hte = $env->{HTTP_TRANSFER_ENCODING}; + if (defined $hte) { + # rfc7230 3.3.3, point 3 says only chunked is accepted + # as the final encoding. Since neither public-inbox-httpd, + # git-http-backend, or our WWW-related code uses "gzip", + # "deflate" or "compress" as the Transfer-Encoding, we'll + # reject them: + return quit($self, 400) if $hte !~ /\Achunked\z/i; + $len = CHUNK_START; - $input = IO::File->new_tmpfile; + $input = tmpfile('http.input', $self->{sock}); + } else { + $len = $env->{CONTENT_LENGTH}; + if (defined $len) { + # rfc7230 3.3.3.4 + return quit($self, 400) if $len !~ /\A[0-9]+\z/; + return quit($self, 413) if $len > $MAX_REQUEST_BUFFER; + $input = $len ? tmpfile('http.input', $self->{sock}) + : $null_io; + } else { + $input = $null_io; + } } # TODO: expire idle clients on ENFILE / EMFILE - return unless $input; - - $env->{'psgi.input'} = $input; + $env->{'psgi.input'} = $input // return; $self->{env} = $env; $self->{input_left} = $len || 0; } -sub env_chunked { ($_[0]->{HTTP_TRANSFER_ENCODING} || '') =~ /\bchunked\b/i } +sub env_chunked { ($_[0]->{HTTP_TRANSFER_ENCODING} // '') =~ /\Achunked\z/i } sub write_err { - my ($self) = @_; - my $err = $self->{httpd}->{env}->{'psgi.errors'}; + my ($self, $len) = @_; my $msg = $! || '(zero write)'; - $err->print("error buffering to input: $msg\n"); + $msg .= " ($len bytes remaining)" if defined $len; + warn "error buffering to input: $msg"; quit($self, 500); } sub recv_err { - my ($self, $r, $len) = @_; - return $self->close if (defined $r && $r == 0); - if ($!{EAGAIN}) { + my ($self, $len) = @_; + if ($! == EAGAIN) { # epoll/kevent watch already set by do_read $self->{input_left} = $len; - return; - } - my $err = $self->{httpd}->{env}->{'psgi.errors'}; - $err->print("error reading for input: $! ($len bytes remaining)\n"); - quit($self, 500); -} - -sub write_in_full { - my ($fh, $rbuf, $len) = @_; - my $rv = 0; - my $off = 0; - while ($len > 0) { - my $w = syswrite($fh, $$rbuf, $len, $off); - return ($rv ? $rv : $w) unless $w; # undef or 0 - $rv += $w; - $off += $w; - $len -= $w; + } else { + warn "error reading input: $! ($len bytes remaining)"; } - $rv } -sub event_read_input_chunked { # unlikely... - my ($self) = @_; +sub read_input_chunked { # unlikely... + my ($self, $rbuf) = @_; + $rbuf //= $self->{rbuf} // (\(my $x = '')); my $input = $self->{env}->{'psgi.input'}; - my $sock = $self->{sock}; - my $len = $self->{input_left}; - $self->{input_left} = undef; - my $rbuf = \($self->{rbuf}); + my $len = delete $self->{input_left}; while (1) { # chunk start if ($len == CHUNK_ZEND) { $$rbuf =~ s/\A\r\n//s and - return app_dispatch($self, $input); + return app_dispatch($self, $input, $rbuf); + return quit($self, 400) if length($$rbuf) > 2; } if ($len == CHUNK_END) { @@ -391,9 +382,8 @@ sub event_read_input_chunked { # unlikely... } if ($len < 0) { # chunk header is trickled, read more - my $off = length($$rbuf); - my $r = sysread($sock, $$rbuf, 8192, $off); - return recv_err($self, $r, $len) unless $r; + $self->do_read($rbuf, 8192, length($$rbuf)) or + return recv_err($self, $len); # (implicit) goto chunk_start if $r > 0; } $len = CHUNK_ZEND if $len == 0; @@ -401,8 +391,8 @@ sub event_read_input_chunked { # unlikely... # drain the current chunk until ($len <= 0) { if ($$rbuf ne '') { - my $w = write_in_full($input, $rbuf, $len); - return write_err($self) unless $w; + my $w = syswrite($input, $$rbuf, $len); + return write_err($self, "$len chunk") if !$w; $len -= $w; if ($len == 0) { # we may have leftover data to parse @@ -417,8 +407,8 @@ sub event_read_input_chunked { # unlikely... } if ($$rbuf eq '') { # read more of current chunk - my $r = sysread($sock, $$rbuf, 8192); - return recv_err($self, $r, $len) unless $r; + $self->do_read($rbuf, 8192) or + return recv_err($self, $len); } } } @@ -427,25 +417,55 @@ sub event_read_input_chunked { # unlikely... sub quit { my ($self, $status) = @_; my $h = "HTTP/1.1 $status " . status_message($status) . "\r\n\r\n"; - $self->write($h); + $self->write(\$h); $self->close; + undef; # input_prepare expects this +} + +sub close { + my $self = $_[0]; + if (my $forward = delete $self->{forward}) { + eval { $forward->close }; + warn "forward ->close error: $@" if $@; + } + $self->SUPER::close; # PublicInbox::DS::close } -# callbacks for Danga::Socket +sub busy { # for graceful shutdown in PublicInbox::Daemon: + my ($self) = @_; + defined($self->{rbuf}) || exists($self->{env}) || defined($self->{wbuf}) +} + +# runs $cb on the next iteration of the event loop at earliest +sub next_step { + my ($self, $cb) = @_; + return unless exists $self->{sock}; + $self->requeue if 1 == push(@{$self->{wbuf}}, $cb); +} + +# Chunked and Identity packages are used for writing responses. +# They may be exposed to the PSGI application when the PSGI app +# returns a CODE ref for "push"-based responses +package PublicInbox::HTTP::Chunked; +use strict; -sub event_hup { $_[0]->close } -sub event_err { $_[0]->close } +sub write { + # ([$http], $buf) = @_; + PublicInbox::HTTP::chunked_write($_[0]->[0], $_[1]) +} sub close { - my $self = shift; - $self->{env} = undef; - $self->SUPER::close(@_); + # $_[0] = [$http, $alive] + PublicInbox::HTTP::response_done(@{$_[0]}); } -# for graceful shutdown in PublicInbox::Daemon: -sub busy () { - my ($self) = @_; - ($self->{rbuf} ne '' || $self->{env} || $self->{write_buf_size}); +package PublicInbox::HTTP::Identity; +use strict; +our @ISA = qw(PublicInbox::HTTP::Chunked); + +sub write { + # ([$http], $buf) = @_; + PublicInbox::HTTP::identity_write($_[0]->[0], $_[1]); } 1;