# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # This package can either be a PSGI response body for a static file # OR a standalone PSGI app which returns the above PSGI response body # (or an HTML directory listing). # # It encapsulates the "autoindex", "index", and "gzip_static" # functionality of nginx. package PublicInbox::WwwStatic; use strict; use v5.10.1; use parent qw(Exporter); use Fcntl qw(SEEK_SET O_RDONLY O_NONBLOCK); use POSIX qw(strftime); use HTTP::Date qw(time2str); use HTTP::Status qw(status_message); use Errno qw(EACCES ENOTDIR ENOENT); use URI::Escape qw(uri_escape_utf8); use PublicInbox::GzipFilter qw(gzf_maybe); use PublicInbox::Hval qw(ascii_html); use Plack::MIME; our @EXPORT_OK = qw(@NO_CACHE r path_info_raw); our @NO_CACHE = ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT', 'Pragma', 'no-cache', 'Cache-Control', 'no-cache, max-age=0, must-revalidate'); our $STYLE = <<'EOF'; EOF $STYLE =~ s/^\s*//gm; $STYLE =~ tr/\n//d; sub r ($;$) { my ($code, $msg) = @_; $msg ||= status_message($code); [ $code, [ qw(Content-Type text/plain), 'Content-Length', length($msg), @NO_CACHE ], [ $msg ] ] } sub getline_response ($$$$$) { my ($env, $in, $off, $len, $path) = @_; my $r = bless {}, __PACKAGE__; if ($env->{'pi-httpd.async'}) { # public-inbox-httpd-only mode $env->{'psgix.no-compress'} = 1; # do not chunk response %$r = ( bypass => [$in, $off, $len, $env->{'psgix.io'}] ); } else { %$r = ( in => $in, off => $off, len => $len, path => $path ); } $r; } sub setup_range { my ($env, $in, $h, $beg, $end, $size) = @_; my $code = 200; my $len = $size; if ($beg eq '') { if ($end ne '') { # "bytes=-$end" => last N bytes $beg = $size - $end; $beg = 0 if $beg < 0; $end = $size - 1; $code = 206; } else { $code = 416; } } else { if ($beg > $size) { $code = 416; } elsif ($end eq '' || $end >= $size) { $end = $size - 1; $code = 206; } elsif ($end < $size) { $code = 206; } else { $code = 416; } } if ($code == 206) { $len = $end - $beg + 1; if ($len <= 0) { $code = 416; } else { push @$h, qw(Accept-Ranges bytes Content-Range); push @$h, "bytes $beg-$end/$size"; # FIXME: Plack::Middleware::Deflater bug? $env->{'psgix.no-compress'} = 1; } } if ($code == 416) { push @$h, 'Content-Range', "bytes */$size"; return [ 416, $h, [] ]; } ($code, $beg, $len); } # returns a PSGI arrayref response iff .gz and non-.gz mtimes match sub try_gzip_static ($$$$) { my ($env, $h, $path, $type) = @_; return unless ($env->{HTTP_ACCEPT_ENCODING} // '') =~ /\bgzip\b/i; my $mtime; return unless -f $path && defined(($mtime = (stat(_))[9])); my $gz = "$path.gz"; return unless -f $gz && (stat(_))[9] == $mtime; my $res = response($env, $h, $gz, $type); return if ($res->[0] > 300 || $res->[0] < 200); push @{$res->[1]}, qw(Cache-Control no-transform Content-Encoding gzip Vary Accept-Encoding); $res; } sub response ($$$;$) { my ($env, $h, $path, $type) = @_; $type //= Plack::MIME->mime_type($path) // 'application/octet-stream'; if ($path !~ /\.gz\z/i) { if (my $res = try_gzip_static($env, $h, $path, $type)) { return $res; } } my $in; if ($env->{REQUEST_METHOD} eq 'HEAD') { return r(404) unless -f $path && -r _; # in case it's a FIFO :P } else { # GET, callers should've already filtered out other methods if (!sysopen($in, $path, O_RDONLY|O_NONBLOCK)) { return r(404) if $! == ENOENT || $! == ENOTDIR; return r(403) if $! == EACCES; return r(500); } return r(404) unless -f $in; } my $size = -s _; # bare "_" reuses "struct stat" from "-f" above my $mtime = time2str((stat(_))[9]); if (my $ims = $env->{HTTP_IF_MODIFIED_SINCE}) { return [ 304, [], [] ] if $mtime eq $ims; } my $len = $size; my $code = 200; push @$h, 'Content-Type', $type; my $off = 0; if (($env->{HTTP_RANGE} || '') =~ /\bbytes=([0-9]*)-([0-9]*)\z/) { ($code, $off, $len) = setup_range($env, $in, $h, $1, $2, $size); return $code if ref($code); } push @$h, 'Content-Length', $len, 'Last-Modified', $mtime; [ $code, $h, $in ? getline_response($env, $in, $off, $len, $path) : [] ] } # called by PSGI servers on each response chunk: sub getline { my ($self) = @_; # avoid buffering, by becoming the buffer! (public-inbox-httpd) if (my $tmpio = delete $self->{bypass}) { my $http = pop @$tmpio; # PublicInbox::HTTP push @{$http->{wbuf}}, $tmpio; # [ $in, $off, $len ] $http->flush_write; return; # undef, EOF } # generic PSGI runs this: my $len = $self->{len} or return; # undef, tells server we're done my $n = 8192; $n = $len if $len < $n; sysseek($self->{in}, $self->{off}, SEEK_SET) or die "sysseek ($self->{path}): $!"; my $r = sysread($self->{in}, my $buf, $n); if (defined $r && $r > 0) { # success! $self->{len} = $len - $r; $self->{off} += $r; return $buf; } my $m = defined $r ? "EOF with $len bytes left" : "read error: $!"; die "$self->{path} $m, dropping client socket\n"; } sub close {} # noop, called by PSGI server, just let everything go out-of-scope # OO interface for use as a Plack app sub new { my ($class, %opt) = @_; my $index = $opt{'index'} // [ 'index.html' ]; $index = [ $index ] if defined($index) && ref($index) ne 'ARRAY'; $index = undef if scalar(@$index) == 0; my $style = $opt{style}; if (defined $style) { $style = \$style unless ref($style); } my $docroot = $opt{docroot}; die "`docroot' not set" unless defined($docroot) && $docroot ne ''; bless { docroot => $docroot, index => $index, autoindex => $opt{autoindex}, style => $style // \$STYLE, }, $class; } # PATH_INFO is decoded, and we want the undecoded original my %path_re_cache; sub path_info_raw ($) { my ($env) = @_; my $sn = $env->{SCRIPT_NAME}; my $re = $path_re_cache{$sn} //= do { $sn = '/'.$sn unless index($sn, '/') == 0; $sn =~ s!/\z!!; qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!; }; $env->{REQUEST_URI} =~ $re ? $1 : $env->{PATH_INFO}; } sub redirect_slash ($) { my ($env) = @_; my $url = $env->{'psgi.url_scheme'} . '://'; my $host_port = $env->{HTTP_HOST} // "$env->{SERVER_NAME}:$env->{SERVER_PORT}"; $url .= $host_port . path_info_raw($env) . '/'; my $body = "Redirecting to $url\n"; [ 302, [ qw(Content-Type text/plain), 'Location', $url, 'Content-Length', length($body) ], [ $body ] ] } sub human_size ($) { my ($size) = @_; my $suffix = ''; for my $s (qw(K M G T P)) { last if $size < 1024; $size /= 1024; if ($size <= 1024) { $suffix = $s; last; } } sprintf('%lu', $size).$suffix; } # by default, this returns "index.html" if it exists for a given directory # It'll generate a directory listing, (autoindex). # May be disabled by setting autoindex => 0 sub dir_response ($$$) { my ($self, $env, $fs_path) = @_; if (my $index = $self->{'index'}) { # serve index.html or similar for my $html (@$index) { my $p = $fs_path . $html; my $res = response($env, [], $p); return $res if $res->[0] != 404; } } return r(404) unless $self->{autoindex}; opendir(my $dh, $fs_path) or do { return r(404) if ($! == ENOENT || $! == ENOTDIR); return r(403) if $! == EACCES; return r(500); }; my @entries = grep(!/\A\./, readdir($dh)); $dh = undef; my (%dirs, %other, %want_gz); my $path_info = $env->{PATH_INFO}; push @entries, '..' if $path_info ne '/'; for my $base (@entries) { my $href = ascii_html(uri_escape_utf8($base)); my $name = ascii_html($base); my @st = stat($fs_path . $base) or next; # unlikely my ($gzipped, $uncompressed, $hsize); my $entry = ''; my $mtime = $st[9]; if (-d _) { $href .= '/'; $name .= '/'; $hsize = '-'; $dirs{"$base\0$mtime"} = \$entry; } elsif (-f _) { $other{"$base\0$mtime"} = \$entry; if ($base !~ /\.gz\z/i) { $want_gz{"$base.gz\0$mtime"} = undef; } $hsize = human_size($st[7]); } else { next; } # 54 = 80 - (SP length(strftime(%Y-%m-%d %k:%M)) SP human_size) $hsize = sprintf('% 8s', $hsize); my $pad = 54 - length($name); $pad = 1 if $pad <= 0; $entry .= qq($name) . (' ' x $pad); $mtime = strftime('%Y-%m-%d %k:%M', gmtime($mtime)); $entry .= $mtime . $hsize; } # filter out '.gz' files as long as the mtime matches the # uncompressed version delete(@other{keys %want_gz}); @entries = ((map { ${$dirs{$_}} } sort keys %dirs), (map { ${$other{$_}} } sort keys %other)); my $path_info_html = ascii_html($path_info); my $h = [qw(Content-Type text/html Content-Length), undef]; my $gzf = gzf_maybe($h, $env); $gzf->zmore("Index of $path_info_html" . ${$self->{style}} . "
Index of $path_info_html

\n");
	$gzf->zmore(join("\n", @entries));
	my $out = $gzf->zflush("

\n"); $h->[3] = length($out); [ 200, $h, [ $out ] ] } sub call { # PSGI app endpoint my ($self, $env) = @_; return r(405) if $env->{REQUEST_METHOD} !~ /\A(?:GET|HEAD)\z/; my $path_info = $env->{PATH_INFO}; return r(403) if index($path_info, "\0") >= 0; my (@parts) = split(m!/+!, $path_info, -1); return r(403) if grep(/\A(?:\.\.)\z/, @parts) || $parts[0] ne ''; my $fs_path = join('/', $self->{docroot}, @parts); return dir_response($self, $env, $fs_path) if $parts[-1] eq ''; my $res = response($env, [], $fs_path); $res->[0] == 404 && -d $fs_path ? redirect_slash($env) : $res; } 1;