X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FHTTP.pm;h=e19c592c99612ae29960676e5112166d5f5642eb;hb=5422a844b7384c32b3532d128e15e0b50d24435b;hp=4d771f2bfcdcee160c17e2743e85c58a96a04c2e;hpb=9b12760f530967f7200d04c9d8b5c0b27d9e1283;p=public-inbox.git diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index 4d771f2b..e19c592c 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -4,20 +4,20 @@ # Generic PSGI server for convenience. It aims to provide # a consistent experience for public-inbox admins so they don't have # to learn different ways to admin both NNTP and HTTP components. -# There's nothing public-inbox-specific, here. +# There's nothing which depends on public-inbox, here. # Each instance of this class represents a HTTP client socket package PublicInbox::HTTP; use strict; use warnings; use base qw(Danga::Socket); -use fields qw(httpd env rbuf input_left); +use fields qw(httpd env rbuf input_left remote_addr remote_port forward pull); use Fcntl qw(:seek); -use HTTP::Parser::XS qw(parse_http_request); # supports pure Perl fallback +use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl use HTTP::Status qw(status_message); use HTTP::Date qw(time2str); +use Scalar::Util qw(weaken); use IO::File; -my $null_io = IO::File->new('/dev/null', '<'); use constant { CHUNK_START => -1, # [a-f0-9]+\r\n CHUNK_END => -2, # \r\n @@ -25,12 +25,49 @@ use constant { CHUNK_MAX_HDR => 256, }; +# FIXME: duplicated code with NNTP.pm, layering violation +my $WEAKEN = {}; # string(inbox) -> inbox +my $weakt; +sub weaken_task () { + $weakt = undef; + $_->weaken_all for values %$WEAKEN; + $WEAKEN = {}; +} + +my $pipelineq = []; +my $pipet; +sub process_pipelineq () { + my $q = $pipelineq; + $pipet = undef; + $pipelineq = []; + foreach (@$q) { + next if $_->{closed}; + rbuf_process($_); + } +} + +# Use the same configuration parameter as git since this is primarily +# a slow-client sponge for git-http-backend +# TODO: support per-respository http.maxRequestBuffer somehow... +our $MAX_REQUEST_BUFFER = $ENV{GIT_HTTP_MAX_REQUEST_BUFFER} || + (10 * 1024 * 1024); + +my $null_io = IO::File->new('/dev/null', '<'); +my $http_date; +my $prev = 0; +sub http_date () { + my $now = time; + $now == $prev ? $http_date : ($http_date = time2str($prev = $now)); +} + sub new ($$$) { my ($class, $sock, $addr, $httpd) = @_; my $self = fields::new($class); $self->SUPER::new($sock); $self->{httpd} = $httpd; $self->{rbuf} = ''; + ($self->{remote_addr}, $self->{remote_port}) = + PublicInbox::Daemon::host_with_port($addr); $self->watch_read(1); $self; } @@ -61,10 +98,17 @@ sub rbuf_process { # We do not support Trailers in chunked requests, for now # (they are rarely-used and git (as of 2.7.2) does not use them) - return $self->quit(400) if $r == -1 || $env{HTTP_TRAILER}; + if ($r == -1 || $env{HTTP_TRAILER} || + # this length-check is necessary for PURE_PERL=1: + ($r == -2 && length($self->{rbuf}) > 0x4000)) { + return quit($self, 400); + } return $self->watch_read(1) if $r < 0; # incomplete $self->{rbuf} = substr($self->{rbuf}, $r); + my $len = input_prepare($self, \%env); + defined $len or return write_err($self); # EMFILE/ENFILE + $len ? event_read_input($self) : app_dispatch($self); } @@ -83,7 +127,7 @@ sub event_read_input ($) { while ($len > 0) { if ($$rbuf ne '') { my $w = write_in_full($input, $rbuf, $len); - return $self->write_err unless $w; + return write_err($self) unless $w; $len -= $w; die "BUG: $len < 0 (w=$w)" if $len < 0; if ($len == 0) { # next request may be pipelined @@ -93,23 +137,29 @@ sub event_read_input ($) { $$rbuf = ''; } my $r = sysread($sock, $$rbuf, 8192); - return $self->recv_err($r, $len) unless $r; + return recv_err($self, $r, $len) unless $r; # continue looping if $r > 0; } - app_dispatch($self); + app_dispatch($self, $input); } -sub app_dispatch ($) { - my ($self) = @_; +sub app_dispatch { + my ($self, $input) = @_; $self->watch_read(0); my $env = $self->{env}; - $env->{REMOTE_ADDR} = $self->peer_ip_string; # Danga::Socket - $env->{REMOTE_PORT} = $self->{peer_port}; # set by peer_ip_string + $env->{REMOTE_ADDR} = $self->{remote_addr}; + $env->{REMOTE_PORT} = $self->{remote_port}; if (my $host = $env->{HTTP_HOST}) { $host =~ s/:(\d+)\z// and $env->{SERVER_PORT} = $1; $env->{SERVER_NAME} = $host; } - sysseek($env->{'psgi.input'}, 0, SEEK_SET) or die "input seek failed: $!"; + if (defined $input) { + sysseek($input, 0, SEEK_SET) or + die "BUG: psgi.input seek failed: $!"; + } + # note: NOT $self->{sock}, we want our close (+ Danga::Socket::close), + # to do proper cleanup: + $env->{'psgix.io'} = $self; # only for ->close my $res = Plack::Util::run_app($self->{httpd}->{app}, $env); eval { if (ref($res) eq 'CODE') { @@ -138,42 +188,118 @@ sub response_header_write { if ($k =~ /\ATransfer-Encoding\z/i && $v =~ /\bchunked\b/i) { $chunked = 1; } - $h .= "$k: $v\r\n"; } my $conn = $env->{HTTP_CONNECTION} || ''; - my $alive = (defined($len) || $chunked) && - ($proto eq 'HTTP/1.1' && $conn !~ /\bclose\b/i) || - ($conn =~ /\bkeep-alive\b/i); - - $h .= 'Connection: ' . ($alive ? 'keep-alive' : 'close'); - $h .= "\r\nDate: " . time2str(time) . "\r\n\r\n"; + my $term = defined($len) || $chunked; + my $prot_persist = ($proto eq 'HTTP/1.1') && ($conn !~ /\bclose\b/i); + my $alive; + if (!$term && $prot_persist) { # auto-chunk + $chunked = $alive = 2; + $h .= "Transfer-Encoding: chunked\r\n"; + # no need for "Connection: keep-alive" with HTTP/1.1 + } elsif ($term && ($prot_persist || ($conn =~ /\bkeep-alive\b/i))) { + $alive = 1; + $h .= "Connection: keep-alive\r\n"; + } else { + $alive = 0; + $h .= "Connection: close\r\n"; + } + $h .= 'Date: ' . http_date() . "\r\n\r\n"; if (($len || $chunked) && $env->{REQUEST_METHOD} ne 'HEAD') { more($self, $h); } else { $self->write($h); } - ($alive, $chunked); + $alive; +} + +# middlewares such as Deflater may write empty strings +sub chunked_wcb ($) { + my ($self) = @_; + sub { + return if $_[0] eq ''; + more($self, sprintf("%x\r\n", bytes::length($_[0]))); + more($self, $_[0]); + + # use $self->write("\n\n") if you care about real-time + # streaming responses, public-inbox WWW does not. + more($self, "\r\n"); + } +} + +sub identity_wcb ($) { + my ($self) = @_; + sub { $self->write(\($_[0])) if $_[0] ne '' } +} + +sub next_request ($) { + my ($self) = @_; + $self->watch_write(0); + if ($self->{rbuf} eq '') { # wait for next request + $self->watch_read(1); + } else { # avoid recursion for pipelined requests + push @$pipelineq, $self; + $pipet ||= PublicInbox::EvCleanup::asap(*process_pipelineq); + } +} + +sub response_done ($$) { + my ($self, $alive) = @_; + my $env = $self->{env}; + $self->{env} = undef; + $self->write("0\r\n\r\n") if $alive == 2; + $self->write(sub { $alive ? next_request($self) : $self->close }); + + # FIXME: layering violation + if (my $obj = $env->{'pi-httpd.inbox'}) { + # grace period for reaping resources + $WEAKEN->{"$obj"} = $obj; + PublicInbox::EvCleanup::later(*weaken_task); + } +} + +sub getline_response { + my ($self, $body, $write, $close) = @_; + $self->{forward} = $body; + weaken($self); + my $pull = $self->{pull} = sub { + local $/ = \8192; + my $forward = $self->{forward}; + # limit our own running time for fairness with other + # clients and to avoid buffering too much: + while ($forward && defined(my $buf = $forward->getline)) { + $write->($buf); + last if $self->{closed}; + if ($self->{write_buf_size}) { + $self->write($self->{pull}); + } else { + PublicInbox::EvCleanup::asap($self->{pull}); + } + return; + } + $self->{forward} = $self->{pull} = undef; + $forward->close if $forward; # avoid recursion + $close->(); + }; + $pull->(); } sub response_write { my ($self, $env, $res) = @_; - my ($alive, $chunked) = response_header_write($self, $env, $res); - my $write = sub { $self->write($_[0]) }; - my $close = sub { - if ($alive) { - $self->event_write; # watch for readability if done + my $alive = response_header_write($self, $env, $res); + + my $write = $alive == 2 ? chunked_wcb($self) : identity_wcb($self); + my $close = sub { response_done($self, $alive) }; + if (defined(my $body = $res->[2])) { + if (ref $body eq 'ARRAY') { + $write->($_) foreach @$body; + $close->(); } else { - $self->write(sub { $self->close }); + getline_response($self, $body, $write, $close); } - $self->{env} = undef; - }; - - if (defined $res->[2]) { - Plack::Util::foreach($res->[2], $write); - $close->(); } else { # this is returned to the calling application: Plack::Util::inline_object(write => $write, close => $close); @@ -183,6 +309,7 @@ sub response_write { use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0; sub more ($$) { my $self = $_[0]; + return if $self->{closed}; if (MSG_MORE && !$self->{write_buf_size}) { my $n = send($self->{sock}, $_[1], MSG_MORE); if (defined $n) { @@ -195,44 +322,37 @@ sub more ($$) { $self->write($_[1]); } -# overrides existing Danga::Socket method -sub event_write { - my ($self) = @_; - # only continue watching for readability when we are done writing: - return if $self->write(undef) != 1; - - if ($self->{rbuf} eq '') { - $self->watch_read(1); - } else { - # avoid recursion for pipelined requests - Danga::Socket->AddTimer(0, sub { rbuf_process($self) }); - } -} - sub input_prepare { my ($self, $env) = @_; my $input = $null_io; my $len = $env->{CONTENT_LENGTH}; if ($len) { + if ($len > $MAX_REQUEST_BUFFER) { + quit($self, 413); + return; + } $input = IO::File->new_tmpfile; } elsif (env_chunked($env)) { - $input = IO::File->new_tmpfile; $len = CHUNK_START; + $input = IO::File->new_tmpfile; } - binmode $input; + + # TODO: expire idle clients on ENFILE / EMFILE + return unless $input; + $env->{'psgi.input'} = $input; $self->{env} = $env; - $self->{input_left} = $len; + $self->{input_left} = $len || 0; } sub env_chunked { ($_[0]->{HTTP_TRANSFER_ENCODING} || '') =~ /\bchunked\b/i } sub write_err { my ($self) = @_; - my $err = $self->{env}->{'psgi.errors'}; + my $err = $self->{httpd}->{env}->{'psgi.errors'}; my $msg = $! || '(zero write)'; $err->print("error buffering to input: $msg\n"); - $self->quit(500); + quit($self, 500); } sub recv_err { @@ -242,9 +362,9 @@ sub recv_err { $self->{input_left} = $len; return; } - my $err = $self->{env}->{'psgi.errors'}; + my $err = $self->{httpd}->{env}->{'psgi.errors'}; $err->print("error reading for input: $! ($len bytes remaining)\n"); - $self->quit(500); + quit($self, 500); } sub write_in_full { @@ -271,21 +391,25 @@ sub event_read_input_chunked { # unlikely... while (1) { # chunk start if ($len == CHUNK_ZEND) { - return app_dispatch($self) if $$rbuf =~ s/\A\r\n//s; - return $self->quit(400) if length($$rbuf) > 2; + $$rbuf =~ s/\A\r\n//s and + return app_dispatch($self, $input); + return quit($self, 400) if length($$rbuf) > 2; } if ($len == CHUNK_END) { if ($$rbuf =~ s/\A\r\n//s) { $len = CHUNK_START; } elsif (length($$rbuf) > 2) { - return $self->quit(400); + return quit($self, 400); } } if ($len == CHUNK_START) { if ($$rbuf =~ s/\A([a-f0-9]+).*?\r\n//i) { $len = hex $1; + if (($len + -s $input) > $MAX_REQUEST_BUFFER) { + return quit($self, 413); + } } elsif (length($$rbuf) > CHUNK_MAX_HDR) { - return $self->quit(400); + return quit($self, 400); } # will break from loop since $len >= 0 } @@ -293,7 +417,7 @@ sub event_read_input_chunked { # unlikely... if ($len < 0) { # chunk header is trickled, read more my $off = length($$rbuf); my $r = sysread($sock, $$rbuf, 8192, $off); - return $self->recv_err($r, $len) unless $r; + return recv_err($self, $r, $len) unless $r; # (implicit) goto chunk_start if $r > 0; } $len = CHUNK_ZEND if $len == 0; @@ -302,7 +426,7 @@ sub event_read_input_chunked { # unlikely... until ($len <= 0) { if ($$rbuf ne '') { my $w = write_in_full($input, $rbuf, $len); - return $self->write_err unless $w; + return write_err($self) unless $w; $len -= $w; if ($len == 0) { # we may have leftover data to parse @@ -318,7 +442,7 @@ sub event_read_input_chunked { # unlikely... if ($$rbuf eq '') { # read more of current chunk my $r = sysread($sock, $$rbuf, 8192); - return $self->recv_err($r, $len) unless $r; + return recv_err($self, $r, $len) unless $r; } } } @@ -336,6 +460,16 @@ sub quit { sub event_hup { $_[0]->close } sub event_err { $_[0]->close } +sub close { + my $self = shift; + my $forward = $self->{forward}; + my $env = $self->{env}; + delete $env->{'psgix.io'} if $env; # prevent circular referernces + $self->{pull} = $self->{forward} = $self->{env} = undef; + $forward->close if $forward; + $self->SUPER::close(@_); +} + # for graceful shutdown in PublicInbox::Daemon: sub busy () { my ($self) = @_;