X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=blobdiff_plain;f=lib%2FPublicInbox%2FWwwAttach.pm;h=09c66d0239cb40a5a3942f957154b4ec54fc1097;hp=2de568041f31a17b28f76faf28e68a892b755a5e;hb=c39ed01a3a4c6c4634642eb875a16538aceacfc3;hpb=9bd675d33ad1e49bd2ebe12a1d216216e61380de diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm index 2de56804..09c66d02 100644 --- a/lib/PublicInbox/WwwAttach.pm +++ b/lib/PublicInbox/WwwAttach.pm @@ -1,43 +1,107 @@ -# Copyright (C) 2016-2019 all contributors +# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # For retrieving attachments from messages in the WWW interface package PublicInbox::WwwAttach; # internal package use strict; -use warnings; +use parent qw(PublicInbox::GzipFilter); use bytes (); # only for bytes::length -use Email::MIME::ContentType qw(parse_content_type); -use PublicInbox::MIME; -use PublicInbox::MsgIter; +use PublicInbox::EmlContentFoo qw(parse_content_type); +use PublicInbox::Eml; + +sub referer_match ($) { + my ($ctx) = @_; + my $env = $ctx->{env}; + my $referer = $env->{HTTP_REFERER} // ''; + return 1 if $referer eq ''; # no referer is always OK for wget/curl + + # prevent deep-linking from other domains on some browsers (Firefox) + # n.b.: $ctx->{-inbox}->base_url($env) with INBOX_URL won't work + # with dillo, we can only match "$url_scheme://$HTTP_HOST/" without + # path components + my $base_url = $env->{'psgi.url_scheme'} . '://' . + ($env->{HTTP_HOST} // + "$env->{SERVER_NAME}:$env->{SERVER_PORT}") . '/'; + index($referer, $base_url) == 0; +} + +sub get_attach_i { # ->each_part callback + my ($part, $depth, $idx) = @{$_[0]}; + my $ctx = $_[1]; + return if $idx ne $ctx->{idx}; # [0-9]+(?:\.[0-9]+)+ + my $res = $ctx->{res}; + $res->[0] = 200; + my $ct = $part->content_type; + $ct = parse_content_type($ct) if $ct; + + if ($ct && (($ct->{type} || '') eq 'text')) { + # display all text as text/plain: + my $cset = $ct->{attributes}->{charset}; + if ($cset && ($cset =~ /\A[a-zA-Z0-9_\-]+\z/)) { + $res->[1]->[1] .= qq(; charset=$cset); + } + $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res->[1], + $ctx->{env}); + $part = $ctx->zflush($part->body); + } else { # TODO: allow user to configure safe types + if (referer_match($ctx)) { + $res->[1]->[1] = 'application/octet-stream'; + $part = $part->body; + } else { + $res->[0] = 403; + $res->[1]->[1] = 'text/plain'; + $part = "Deep-linking prevented\n"; + } + } + push @{$res->[1]}, 'Content-Length', bytes::length($part); + $res->[2]->[0] = $part; +} + +sub async_eml { # for async_blob_cb + my ($ctx, $eml) = @_; + eval { $eml->each_part(\&get_attach_i, $ctx, 1) }; + if ($@) { + $ctx->{res}->[0] = 500; + warn "E: $@"; + } +} + +sub async_next { + my ($http) = @_; + my $ctx = $http->{forward} or return; # client aborted + # finally, we call the user-supplied callback + eval { $ctx->{wcb}->($ctx->{res}) }; + warn "E: $@" if $@; +} + +sub scan_attach ($) { # public-inbox-httpd only + my ($ctx) = @_; + $ctx->{env}->{'psgix.io'}->{forward} = $ctx; + $ctx->smsg_blob($ctx->{smsg}); +} # /$LISTNAME/$MESSAGE_ID/$IDX-$FILENAME sub get_attach ($$$) { my ($ctx, $idx, $fn) = @_; - my $res = [ 404, [ 'Content-Type', 'text/plain' ], [ "Not found\n" ] ]; - my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res; - $mime = PublicInbox::MIME->new($mime); - msg_iter($mime, sub { - my ($part, $depth, @idx) = @{$_[0]}; - return if join('.', @idx) ne $idx; - $res->[0] = 200; - my $ct = $part->content_type; - $ct = parse_content_type($ct) if $ct; - - # discrete == type, we remain Debian wheezy-compatible - if ($ct && (($ct->{discrete} || '') eq 'text')) { - # display all text as text/plain: - my $cset = $ct->{attributes}->{charset}; - if ($cset && ($cset =~ /\A[a-zA-Z0-9_\-]+\z/)) { - $res->[1]->[1] .= qq(; charset=$cset); - } - } else { # TODO: allow user to configure safe types - $res->[1]->[1] = 'application/octet-stream'; + $ctx->{res} = [ 404, [ 'Content-Type' => 'text/plain' ], + [ "Not found\n" ] ]; + $ctx->{idx} = $idx; + bless $ctx, __PACKAGE__; + my $eml; + if ($ctx->{smsg} = $ctx->{-inbox}->smsg_by_mid($ctx->{mid})) { + return sub { # public-inbox-httpd-only + $ctx->{wcb} = $_[0]; + scan_attach($ctx); + } if $ctx->{env}->{'pi-httpd.async'}; + # generic PSGI: + $eml = $ctx->{-inbox}->smsg_eml($ctx->{smsg}); + } elsif (!$ctx->{-inbox}->over) { + if (my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid})) { + $eml = PublicInbox::Eml->new($bref); } - $part = $part->body; - push @{$res->[1]}, 'Content-Length', bytes::length($part); - $res->[2]->[0] = $part; - }); - $res; + } + $eml->each_part(\&get_attach_i, $ctx, 1) if $eml; + $ctx->{res} } 1;