1 # Copyright (C) 2016-2019 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # This package can either be a PSGI response body for a static file
5 # OR a standalone PSGI app which returns the above PSGI response body
6 # (or an HTML directory listing).
8 # It encapsulates the "autoindex", "index", and "gzip_static"
9 # functionality of nginx.
10 package PublicInbox::WwwStatic;
12 use parent qw(Exporter);
14 use Fcntl qw(SEEK_SET O_RDONLY O_NONBLOCK);
15 use POSIX qw(strftime);
16 use HTTP::Date qw(time2str);
17 use HTTP::Status qw(status_message);
18 use Errno qw(EACCES ENOTDIR ENOENT);
19 use URI::Escape qw(uri_escape_utf8);
20 use PublicInbox::Hval qw(ascii_html);
22 our @EXPORT_OK = qw(@NO_CACHE r path_info_raw);
24 our @NO_CACHE = ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT',
26 'Cache-Control', 'no-cache, max-age=0, must-revalidate');
31 *{background:#000;color:#ccc}
32 a{color:#69f;text-decoration:none}
35 @media screen AND (prefers-color-scheme:light) {
36 *{background:#fff;color:#333}
37 a{color:#00f;text-decoration:none}
47 my ($code, $msg) = @_;
48 $msg ||= status_message($code);
49 [ $code, [ qw(Content-Type text/plain), 'Content-Length', length($msg),
55 my ($env, $in, $h, $beg, $end, $size) = @_;
59 if ($end ne '') { # "bytes=-$end" => last N bytes
70 } elsif ($end eq '' || $end >= $size) {
73 } elsif ($end < $size) {
80 $len = $end - $beg + 1;
85 sysseek($in, $beg, SEEK_SET) or return r(500);
87 push @$h, qw(Accept-Ranges bytes Content-Range);
88 push @$h, "bytes $beg-$end/$size";
90 # FIXME: Plack::Middleware::Deflater bug?
91 $env->{'psgix.no-compress'} = 1;
95 push @$h, 'Content-Range', "bytes */$size";
96 return [ 416, $h, [] ];
101 # returns a PSGI arrayref response iff .gz and non-.gz mtimes match
102 sub try_gzip_static ($$$$) {
103 my ($env, $h, $path, $type) = @_;
104 return unless ($env->{HTTP_ACCEPT_ENCODING} // '') =~ /\bgzip\b/i;
106 return unless -f $path && defined(($mtime = (stat(_))[9]));
108 return unless -f $gz && (stat(_))[9] == $mtime;
109 my $res = response($env, $h, $gz, $type);
110 return if ($res->[0] > 300 || $res->[0] < 200);
111 push @{$res->[1]}, qw(Cache-Control no-transform Content-Encoding gzip);
115 sub response ($$$;$) {
116 my ($env, $h, $path, $type) = @_;
117 $type //= Plack::MIME->mime_type($path) // 'application/octet-stream';
118 if ($path !~ /\.gz\z/i) {
119 if (my $res = try_gzip_static($env, $h, $path, $type)) {
125 if ($env->{REQUEST_METHOD} eq 'HEAD') {
126 return r(404) unless -f $path && -r _; # in case it's a FIFO :P
127 } else { # GET, callers should've already filtered out other methods
128 if (!sysopen($in, $path, O_RDONLY|O_NONBLOCK)) {
129 return r(404) if $! == ENOENT || $! == ENOTDIR;
130 return r(403) if $! == EACCES;
133 return r(404) unless -f $in;
135 my $size = -s _; # bare "_" reuses "struct stat" from "-f" above
136 my $mtime = time2str((stat(_))[9]);
138 if (my $ims = $env->{HTTP_IF_MODIFIED_SINCE}) {
139 return [ 304, [], [] ] if $mtime eq $ims;
144 push @$h, 'Content-Type', $type;
145 if (($env->{HTTP_RANGE} || '') =~ /\bbytes=([0-9]*)-([0-9]*)\z/) {
146 ($code, $len) = prepare_range($env, $in, $h, $1, $2, $size);
147 return $code if ref($code);
149 push @$h, 'Content-Length', $len, 'Last-Modified', $mtime;
150 my $body = $in ? bless {
157 [ $code, $h, $body ];
160 # called by PSGI servers on each response chunk:
163 my $len = $self->{len} or return; # undef, tells server we're done
164 my $n = delete($self->{initial_rd}) // 8192;
165 $n = $len if $len < $n;
166 my $r = sysread($self->{in}, my $buf, $n);
167 if (defined $r && $r > 0) { # success!
168 $self->{len} = $len - $r;
171 my $m = defined $r ? "EOF with $len bytes left" : "read error: $!";
172 my $env = $self->{env};
173 $env->{'psgi.errors'}->print("$self->{path} $m\n");
175 # drop the client on error
176 if (my $io = $env->{'psgix.io'}) {
177 $io->close; # this is likely PublicInbox::DS::close
178 } else { # for some PSGI servers w/o psgix.io
179 die "dropping client socket\n";
184 sub close {} # noop, called by PSGI server, just let everything go out-of-scope
186 # OO interface for use as a Plack app
188 my ($class, %opt) = @_;
189 my $index = $opt{'index'} // [ 'index.html' ];
190 $index = [ $index ] if defined($index) && ref($index) ne 'ARRAY';
191 $index = undef if scalar(@$index) == 0;
192 my $style = $opt{style};
193 if (defined $style) {
194 $style = \$style unless ref($style);
196 my $docroot = $opt{docroot};
197 die "`docroot' not set" unless defined($docroot) && $docroot ne '';
201 autoindex => $opt{autoindex},
202 style => $style // \$STYLE,
206 # PATH_INFO is decoded, and we want the undecoded original
208 sub path_info_raw ($) {
210 my $sn = $env->{SCRIPT_NAME};
211 my $re = $path_re_cache{$sn} ||= do {
212 $sn = '/'.$sn unless index($sn, '/') == 0;
214 qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!;
216 $env->{REQUEST_URI} =~ $re ? $1 : $env->{PATH_INFO};
219 sub redirect_slash ($) {
221 my $url = $env->{'psgi.url_scheme'} . '://';
222 my $host_port = $env->{HTTP_HOST} //
223 "$env->{SERVER_NAME}:$env->{SERVER_PORT}";
224 $url .= $host_port . path_info_raw($env) . '/';
225 my $body = "Redirecting to $url\n";
226 [ 302, [ qw(Content-Type text/plain), 'Location', $url,
227 'Content-Length', length($body) ], [ $body ] ]
233 for my $s (qw(K M G T P)) {
234 last if $size < 1024;
241 sprintf('%lu', $size).$suffix;
244 # by default, this returns "index.html" if it exists for a given directory
245 # It'll generate a directory listing, (autoindex).
246 # May be disabled by setting autoindex => 0
247 sub dir_response ($$$) {
248 my ($self, $env, $fs_path) = @_;
249 if (my $index = $self->{'index'}) { # serve index.html or similar
250 for my $html (@$index) {
251 my $p = $fs_path . $html;
252 my $res = response($env, [], $p);
253 return $res if $res->[0] != 404;
256 return r(404) unless $self->{autoindex};
257 opendir(my $dh, $fs_path) or do {
258 return r(404) if ($! == ENOENT || $! == ENOTDIR);
259 return r(403) if $! == EACCES;
262 my @entries = grep(!/\A\./, readdir($dh));
264 my (%dirs, %other, %want_gz);
265 my $path_info = $env->{PATH_INFO};
266 push @entries, '..' if $path_info ne '/';
267 for my $base (@entries) {
268 my $href = ascii_html(uri_escape_utf8($base));
269 my $name = ascii_html($base);
270 my @st = stat($fs_path . $base) or next; # unlikely
271 my ($gzipped, $uncompressed, $hsize);
278 $dirs{"$base\0$mtime"} = \$entry;
280 $other{"$base\0$mtime"} = \$entry;
281 if ($base !~ /\.gz\z/i) {
282 $want_gz{"$base.gz\0$mtime"} = undef;
284 $hsize = human_size($st[7]);
288 # 54 = 80 - (SP length(strftime(%Y-%m-%d %k:%M)) SP human_size)
289 $hsize = sprintf('% 8s', $hsize);
290 my $pad = 54 - length($name);
291 $pad = 1 if $pad <= 0;
292 $entry .= qq(<a\nhref="$href">$name</a>) . (' ' x $pad);
293 $mtime = strftime('%Y-%m-%d %k:%M', gmtime($mtime));
294 $entry .= $mtime . $hsize;
297 # filter out '.gz' files as long as the mtime matches the
298 # uncompressed version
299 delete(@other{keys %want_gz});
300 @entries = ((map { ${$dirs{$_}} } sort keys %dirs),
301 (map { ${$other{$_}} } sort keys %other));
303 my $path_info_html = ascii_html($path_info);
304 my $body = "<html><head><title>Index of $path_info_html</title>" .
306 "</head><body><pre>Index of $path_info_html</pre><hr><pre>\n";
307 $body .= join("\n", @entries) . "</pre><hr></body></html>\n";
308 [ 200, [ qw(Content-Type text/html
309 Content-Length), bytes::length($body) ], [ $body ] ]
312 sub call { # PSGI app endpoint
313 my ($self, $env) = @_;
314 return r(405) if $env->{REQUEST_METHOD} !~ /\A(?:GET|HEAD)\z/;
315 my $path_info = $env->{PATH_INFO};
316 return r(403) if index($path_info, "\0") >= 0;
317 my (@parts) = split(m!/+!, $path_info, -1);
318 return r(403) if grep(/\A(?:\.\.)\z/, @parts) || $parts[0] ne '';
320 my $fs_path = join('/', $self->{docroot}, @parts);
321 return dir_response($self, $env, $fs_path) if $parts[-1] eq '';
323 my $res = response($env, [], $fs_path);
324 $res->[0] == 404 && -d $fs_path ? redirect_slash($env) : $res;