X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FHTTP.pm;h=b2c74cf378dcba2ab3f3cd3b6f9578ddb7feab58;hb=5c8909925072804901e9c3b45bbf25446d379e7b;hp=d2f04ba541e3865c2a9f6d0aa2a9b0781cadfc58;hpb=ee9ad0a8236af3bfee4df70516874bb0baa7c0f6;p=public-inbox.git diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index d2f04ba5..b2c74cf3 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2019 all contributors +# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # # Generic PSGI server for convenience. It aims to provide @@ -6,18 +6,27 @@ # to learn different ways to admin both NNTP and HTTP components. # There's nothing which depends on public-inbox, here. # Each instance of this class represents a HTTP client socket - +# +# fields: +# httpd: PublicInbox::HTTPD ref +# env: PSGI env hashref +# input_left: bytes left to read in request body (e.g. POST/PUT) +# remote_addr: remote IP address as a string (e.g. "127.0.0.1") +# remote_port: peer port +# forward: response body object, response to ->getline + ->close +# alive: HTTP keepalive state: +# 0: drop connection when done +# 1: keep connection when done +# 2: keep connection, chunk responses package PublicInbox::HTTP; use strict; -use warnings; -use base qw(PublicInbox::DS); -use fields qw(httpd env input_left remote_addr remote_port forward alive); -use bytes (); # only for bytes::length +use parent qw(PublicInbox::DS); use Fcntl qw(:seek); use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl +use Plack::Util; use HTTP::Status qw(status_message); use HTTP::Date qw(time2str); -use IO::Handle; +use IO::Handle; # ->write use PublicInbox::DS qw(msg_more); use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); use PublicInbox::Tmpfile; @@ -55,20 +64,18 @@ sub http_date () { sub new ($$$) { my ($class, $sock, $addr, $httpd) = @_; - my $self = fields::new($class); + my $self = bless { httpd => $httpd }, $class; my $ev = EPOLLIN; my $wbuf; - if (ref($sock) eq 'IO::Socket::SSL' && !$sock->accept_SSL) { + if ($sock->can('accept_SSL') && !$sock->accept_SSL) { return CORE::close($sock) if $! != EAGAIN; - $ev = PublicInbox::TLS::epollbit(); + $ev = PublicInbox::TLS::epollbit() or return CORE::close($sock); $wbuf = [ \&PublicInbox::DS::accept_tls_step ]; } - $self->SUPER::new($sock, $ev | EPOLLONESHOT); - $self->{httpd} = $httpd; $self->{wbuf} = $wbuf if $wbuf; ($self->{remote_addr}, $self->{remote_port}) = PublicInbox::Daemon::host_with_port($addr); - $self; + $self->SUPER::new($sock, $ev | EPOLLONESHOT); } sub event_step { # called by PublicInbox::DS @@ -79,9 +86,9 @@ sub event_step { # called by PublicInbox::DS # only read more requests if we've drained the write buffer, # otherwise we can be buffering infinitely w/o backpressure - return read_input($self) if defined $self->{env}; + return read_input($self) if ref($self->{env}); my $rbuf = $self->{rbuf} // (\(my $x = '')); - $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or return; + $self->do_read($rbuf, 8192, length($$rbuf)) or return; rbuf_process($self, $rbuf); } @@ -96,7 +103,7 @@ sub rbuf_process { # (they are rarely-used and git (as of 2.7.2) does not use them) if ($r == -1 || $env{HTTP_TRAILER} || # this length-check is necessary for PURE_PERL=1: - ($r == -2 && bytes::length($$rbuf) > 0x4000)) { + ($r == -2 && length($$rbuf) > 0x4000)) { return quit($self, 400); } if ($r < 0) { # incomplete @@ -113,7 +120,7 @@ sub rbuf_process { # IO::Handle::write returns boolean, this returns bytes written: sub xwrite ($$$) { my ($fh, $rbuf, $max) = @_; - my $w = bytes::length($$rbuf); + my $w = length($$rbuf); $w = $max if $w > $max; $fh->write($$rbuf, $w) or return; $w; @@ -123,7 +130,6 @@ sub read_input ($;$) { my ($self, $rbuf) = @_; $rbuf //= $self->{rbuf} // (\(my $x = '')); my $env = $self->{env}; - return if $env->{REMOTE_ADDR}; # in app dispatch return read_input_chunked($self, $rbuf) if env_chunked($env); # env->{CONTENT_LENGTH} (identity) @@ -152,9 +158,10 @@ sub app_dispatch { my ($self, $input, $rbuf) = @_; $self->rbuf_idle($rbuf); my $env = $self->{env}; + $self->{env} = undef; # for exists() check in ->busy $env->{REMOTE_ADDR} = $self->{remote_addr}; $env->{REMOTE_PORT} = $self->{remote_port}; - if (my $host = $env->{HTTP_HOST}) { + if (defined(my $host = $env->{HTTP_HOST})) { $host =~ s/:([0-9]+)\z// and $env->{SERVER_PORT} = $1; $env->{SERVER_NAME} = $host; } @@ -173,7 +180,10 @@ sub app_dispatch { response_write($self, $env, $res); } }; - $self->close if $@; + if ($@) { + err($self, "response_write error: $@"); + $self->close; + } } sub response_header_write { @@ -225,7 +235,7 @@ sub response_header_write { sub chunked_write ($$) { my $self = $_[0]; return if $_[1] eq ''; - msg_more($self, sprintf("%x\r\n", bytes::length($_[1]))); + msg_more($self, sprintf("%x\r\n", length($_[1]))); msg_more($self, $_[1]); # use $self->write(\"\n\n") if you care about real-time @@ -276,12 +286,12 @@ sub getline_pull { } if ($self->{sock}) { - my $wbuf = $self->{wbuf} //= []; - push @$wbuf, \&getline_pull; + # autovivify wbuf + my $new_size = push(@{$self->{wbuf}}, \&getline_pull); # wbuf may be populated by {chunked,identity}_write() # above, no need to rearm if so: - $self->requeue if scalar(@$wbuf) == 1; + $self->requeue if $new_size == 1; return; # likely } } elsif ($@) { @@ -331,19 +341,31 @@ sub input_tmpfile ($) { sub input_prepare { my ($self, $env) = @_; - my $input; - my $len = $env->{CONTENT_LENGTH}; - if ($len) { - if ($len > $MAX_REQUEST_BUFFER) { - quit($self, 413); - return; - } - $input = input_tmpfile($self); - } elsif (env_chunked($env)) { + my ($input, $len); + + # rfc 7230 3.3.2, 3.3.3,: favor Transfer-Encoding over Content-Length + my $hte = $env->{HTTP_TRANSFER_ENCODING}; + if (defined $hte) { + # rfc7230 3.3.3, point 3 says only chunked is accepted + # as the final encoding. Since neither public-inbox-httpd, + # git-http-backend, or our WWW-related code uses "gzip", + # "deflate" or "compress" as the Transfer-Encoding, we'll + # reject them: + return quit($self, 400) if $hte !~ /\Achunked\z/i; + $len = CHUNK_START; $input = input_tmpfile($self); } else { - $input = $null_io; + $len = $env->{CONTENT_LENGTH}; + if (defined $len) { + # rfc7230 3.3.3.4 + return quit($self, 400) if $len !~ /\A[0-9]+\z/; + + return quit($self, 413) if $len > $MAX_REQUEST_BUFFER; + $input = $len ? input_tmpfile($self) : $null_io; + } else { + $input = $null_io; + } } # TODO: expire idle clients on ENFILE / EMFILE @@ -354,7 +376,7 @@ sub input_prepare { $self->{input_left} = $len || 0; } -sub env_chunked { ($_[0]->{HTTP_TRANSFER_ENCODING} || '') =~ /\bchunked\b/i } +sub env_chunked { ($_[0]->{HTTP_TRANSFER_ENCODING} // '') =~ /\Achunked\z/i } sub err ($$) { eval { $_[0]->{httpd}->{env}->{'psgi.errors'}->print($_[1]."\n") }; @@ -388,12 +410,12 @@ sub read_input_chunked { # unlikely... $$rbuf =~ s/\A\r\n//s and return app_dispatch($self, $input, $rbuf); - return quit($self, 400) if bytes::length($$rbuf) > 2; + return quit($self, 400) if length($$rbuf) > 2; } if ($len == CHUNK_END) { if ($$rbuf =~ s/\A\r\n//s) { $len = CHUNK_START; - } elsif (bytes::length($$rbuf) > 2) { + } elsif (length($$rbuf) > 2) { return quit($self, 400); } } @@ -403,14 +425,14 @@ sub read_input_chunked { # unlikely... if (($len + -s $input) > $MAX_REQUEST_BUFFER) { return quit($self, 413); } - } elsif (bytes::length($$rbuf) > CHUNK_MAX_HDR) { + } elsif (length($$rbuf) > CHUNK_MAX_HDR) { return quit($self, 400); } # will break from loop since $len >= 0 } if ($len < 0) { # chunk header is trickled, read more - $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or + $self->do_read($rbuf, 8192, length($$rbuf)) or return recv_err($self, $len); # (implicit) goto chunk_start if $r > 0; } @@ -447,11 +469,11 @@ sub quit { my $h = "HTTP/1.1 $status " . status_message($status) . "\r\n\r\n"; $self->write(\$h); $self->close; + undef; # input_prepare expects this } sub close { my $self = $_[0]; - delete $self->{env}; # prevent circular references if (my $forward = delete $self->{forward}) { eval { $forward->close }; err($self, "forward ->close error: $@") if $@; @@ -462,7 +484,14 @@ sub close { # for graceful shutdown in PublicInbox::Daemon: sub busy () { my ($self) = @_; - ($self->{rbuf} || $self->{env} || $self->{wbuf}); + ($self->{rbuf} || exists($self->{env}) || $self->{wbuf}); +} + +# runs $cb on the next iteration of the event loop at earliest +sub next_step { + my ($self, $cb) = @_; + return unless exists $self->{sock}; + $self->requeue if 1 == push(@{$self->{wbuf}}, $cb); } # Chunked and Identity packages are used for writing responses.