X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWwwStatic.pm;h=e1f536f3f21c1d7ce63ea74f140d2646fb1284b1;hb=b99e2465205a8f27801066b1e914bd4091406fa2;hp=76e50c789b7336a653e99de9e46afbe67dee9203;hpb=07d0e2d336d4697c3284fe3dd59dae0583984e23;p=public-inbox.git diff --git a/lib/PublicInbox/WwwStatic.pm b/lib/PublicInbox/WwwStatic.pm index 76e50c78..e1f536f3 100644 --- a/lib/PublicInbox/WwwStatic.pm +++ b/lib/PublicInbox/WwwStatic.pm @@ -1,9 +1,55 @@ # Copyright (C) 2016-2019 all contributors # License: AGPL-3.0+ +# This package can either be a PSGI response body for a static file +# OR a standalone PSGI app which returns the above PSGI response body +# (or an HTML directory listing). +# +# It encapsulates the "autoindex", "index", and "gzip_static" +# functionality of nginx. package PublicInbox::WwwStatic; use strict; -use Fcntl qw(:seek); +use parent qw(Exporter); +use bytes (); +use Fcntl qw(SEEK_SET O_RDONLY O_NONBLOCK); +use POSIX qw(strftime); +use HTTP::Date qw(time2str); +use HTTP::Status qw(status_message); +use Errno qw(EACCES ENOTDIR ENOENT); +use URI::Escape qw(uri_escape_utf8); +use PublicInbox::Hval qw(ascii_html); +use Plack::MIME; +our @EXPORT_OK = qw(@NO_CACHE r path_info_raw); + +our @NO_CACHE = ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT', + 'Pragma', 'no-cache', + 'Cache-Control', 'no-cache, max-age=0, must-revalidate'); + +our $STYLE = <<'EOF'; + +EOF + +$STYLE =~ s/^\s*//gm; +$STYLE =~ tr/\n//d; + +sub r ($;$) { + my ($code, $msg) = @_; + $msg ||= status_message($code); + [ $code, [ qw(Content-Type text/plain), 'Content-Length', length($msg), + @NO_CACHE ], + [ $msg ] ] +} sub prepare_range { my ($env, $in, $h, $beg, $end, $size) = @_; @@ -35,7 +81,9 @@ sub prepare_range { if ($len <= 0) { $code = 416; } else { - sysseek($in, $beg, SEEK_SET) or return [ 500, [], [] ]; + if ($in) { + sysseek($in, $beg, SEEK_SET) or return r(500); + } push @$h, qw(Accept-Ranges bytes Content-Range); push @$h, "bytes $beg-$end/$size"; @@ -43,63 +91,228 @@ sub prepare_range { $env->{'psgix.no-compress'} = 1; } } + if ($code == 416) { + push @$h, 'Content-Range', "bytes */$size"; + return [ 416, $h, [] ]; + } ($code, $len); } -sub response { +# returns a PSGI arrayref response iff .gz and non-.gz mtimes match +sub try_gzip_static ($$$$) { my ($env, $h, $path, $type) = @_; - return unless -f $path && -r _; # just in case it's a FIFO :P + return unless ($env->{HTTP_ACCEPT_ENCODING} // '') =~ /\bgzip\b/i; + my $mtime; + return unless -f $path && defined(($mtime = (stat(_))[9])); + my $gz = "$path.gz"; + return unless -f $gz && (stat(_))[9] == $mtime; + my $res = response($env, $h, $gz, $type); + return if ($res->[0] > 300 || $res->[0] < 200); + push @{$res->[1]}, qw(Cache-Control no-transform Content-Encoding gzip); + $res; +} + +sub response ($$$;$) { + my ($env, $h, $path, $type) = @_; + $type //= Plack::MIME->mime_type($path) // 'application/octet-stream'; + if ($path !~ /\.gz\z/i) { + if (my $res = try_gzip_static($env, $h, $path, $type)) { + return $res; + } + } + + my $in; + if ($env->{REQUEST_METHOD} eq 'HEAD') { + return r(404) unless -f $path && -r _; # in case it's a FIFO :P + } else { # GET, callers should've already filtered out other methods + if (!sysopen($in, $path, O_RDONLY|O_NONBLOCK)) { + return r(404) if $! == ENOENT || $! == ENOTDIR; + return r(403) if $! == EACCES; + return r(500); + } + return r(404) unless -f $in; + } + my $size = -s _; # bare "_" reuses "struct stat" from "-f" above + my $mtime = time2str((stat(_))[9]); + + if (my $ims = $env->{HTTP_IF_MODIFIED_SINCE}) { + return [ 304, [], [] ] if $mtime eq $ims; + } - # TODO: If-Modified-Since and Last-Modified? - open my $in, '<', $path or return; - my $size = -s $in; my $len = $size; my $code = 200; push @$h, 'Content-Type', $type; if (($env->{HTTP_RANGE} || '') =~ /\bbytes=([0-9]*)-([0-9]*)\z/) { ($code, $len) = prepare_range($env, $in, $h, $1, $2, $size); - if ($code == 416) { - push @$h, 'Content-Range', "bytes */$size"; - return [ 416, $h, [] ]; - } + return $code if ref($code); } - push @$h, 'Content-Length', $len; - my $body = bless { + push @$h, 'Content-Length', $len, 'Last-Modified', $mtime; + my $body = $in ? bless { initial_rd => 65536, len => $len, in => $in, path => $path, env => $env, - }, __PACKAGE__; + }, __PACKAGE__ : []; [ $code, $h, $body ]; } -# called by PSGI servers: +# called by PSGI servers on each response chunk: sub getline { my ($self) = @_; - my $len = $self->{len}; - return if $len == 0; + my $len = $self->{len} or return; # undef, tells server we're done my $n = delete($self->{initial_rd}) // 8192; $n = $len if $len < $n; my $r = sysread($self->{in}, my $buf, $n); - if (!defined $r) { - $self->{env}->{'psgi.errors'}->print( - "$self->{path} read error: $!\n"); - } elsif ($r > 0) { # success! + if (defined $r && $r > 0) { # success! $self->{len} = $len - $r; return $buf; - } else { - $self->{env}->{'psgi.errors'}->print( - "$self->{path} EOF with $len bytes left\n"); } + my $m = defined $r ? "EOF with $len bytes left" : "read error: $!"; + die "$self->{path} $m, dropping client socket\n"; +} + +sub close {} # noop, called by PSGI server, just let everything go out-of-scope + +# OO interface for use as a Plack app +sub new { + my ($class, %opt) = @_; + my $index = $opt{'index'} // [ 'index.html' ]; + $index = [ $index ] if defined($index) && ref($index) ne 'ARRAY'; + $index = undef if scalar(@$index) == 0; + my $style = $opt{style}; + if (defined $style) { + $style = \$style unless ref($style); + } + my $docroot = $opt{docroot}; + die "`docroot' not set" unless defined($docroot) && $docroot ne ''; + bless { + docroot => $docroot, + index => $index, + autoindex => $opt{autoindex}, + style => $style // \$STYLE, + }, $class; +} + +# PATH_INFO is decoded, and we want the undecoded original +my %path_re_cache; +sub path_info_raw ($) { + my ($env) = @_; + my $sn = $env->{SCRIPT_NAME}; + my $re = $path_re_cache{$sn} ||= do { + $sn = '/'.$sn unless index($sn, '/') == 0; + $sn =~ s!/\z!!; + qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!; + }; + $env->{REQUEST_URI} =~ $re ? $1 : $env->{PATH_INFO}; +} - # drop the client on error - if (my $io = $self->{env}->{'psgix.io'}) { - $io->close; # this is PublicInbox::DS::close +sub redirect_slash ($) { + my ($env) = @_; + my $url = $env->{'psgi.url_scheme'} . '://'; + my $host_port = $env->{HTTP_HOST} // + "$env->{SERVER_NAME}:$env->{SERVER_PORT}"; + $url .= $host_port . path_info_raw($env) . '/'; + my $body = "Redirecting to $url\n"; + [ 302, [ qw(Content-Type text/plain), 'Location', $url, + 'Content-Length', length($body) ], [ $body ] ] +} + +sub human_size ($) { + my ($size) = @_; + my $suffix = ''; + for my $s (qw(K M G T P)) { + last if $size < 1024; + $size /= 1024; + if ($size <= 1024) { + $suffix = $s; + last; + } } - undef; + sprintf('%lu', $size).$suffix; } -sub close {} # noop, just let everything go out-of-scope +# by default, this returns "index.html" if it exists for a given directory +# It'll generate a directory listing, (autoindex). +# May be disabled by setting autoindex => 0 +sub dir_response ($$$) { + my ($self, $env, $fs_path) = @_; + if (my $index = $self->{'index'}) { # serve index.html or similar + for my $html (@$index) { + my $p = $fs_path . $html; + my $res = response($env, [], $p); + return $res if $res->[0] != 404; + } + } + return r(404) unless $self->{autoindex}; + opendir(my $dh, $fs_path) or do { + return r(404) if ($! == ENOENT || $! == ENOTDIR); + return r(403) if $! == EACCES; + return r(500); + }; + my @entries = grep(!/\A\./, readdir($dh)); + $dh = undef; + my (%dirs, %other, %want_gz); + my $path_info = $env->{PATH_INFO}; + push @entries, '..' if $path_info ne '/'; + for my $base (@entries) { + my $href = ascii_html(uri_escape_utf8($base)); + my $name = ascii_html($base); + my @st = stat($fs_path . $base) or next; # unlikely + my ($gzipped, $uncompressed, $hsize); + my $entry = ''; + my $mtime = $st[9]; + if (-d _) { + $href .= '/'; + $name .= '/'; + $hsize = '-'; + $dirs{"$base\0$mtime"} = \$entry; + } elsif (-f _) { + $other{"$base\0$mtime"} = \$entry; + if ($base !~ /\.gz\z/i) { + $want_gz{"$base.gz\0$mtime"} = undef; + } + $hsize = human_size($st[7]); + } else { + next; + } + # 54 = 80 - (SP length(strftime(%Y-%m-%d %k:%M)) SP human_size) + $hsize = sprintf('% 8s', $hsize); + my $pad = 54 - length($name); + $pad = 1 if $pad <= 0; + $entry .= qq($name) . (' ' x $pad); + $mtime = strftime('%Y-%m-%d %k:%M', gmtime($mtime)); + $entry .= $mtime . $hsize; + } + + # filter out '.gz' files as long as the mtime matches the + # uncompressed version + delete(@other{keys %want_gz}); + @entries = ((map { ${$dirs{$_}} } sort keys %dirs), + (map { ${$other{$_}} } sort keys %other)); + + my $path_info_html = ascii_html($path_info); + my $body = "Index of $path_info_html" . + ${$self->{style}} . + "
Index of $path_info_html

\n";
+	$body .= join("\n", @entries) . "

\n"; + [ 200, [ qw(Content-Type text/html + Content-Length), bytes::length($body) ], [ $body ] ] +} + +sub call { # PSGI app endpoint + my ($self, $env) = @_; + return r(405) if $env->{REQUEST_METHOD} !~ /\A(?:GET|HEAD)\z/; + my $path_info = $env->{PATH_INFO}; + return r(403) if index($path_info, "\0") >= 0; + my (@parts) = split(m!/+!, $path_info, -1); + return r(403) if grep(/\A(?:\.\.)\z/, @parts) || $parts[0] ne ''; + + my $fs_path = join('/', $self->{docroot}, @parts); + return dir_response($self, $env, $fs_path) if $parts[-1] eq ''; + + my $res = response($env, [], $fs_path); + $res->[0] == 404 && -d $fs_path ? redirect_slash($env) : $res; +} 1;