1 # Copyright (C) 2016-2020 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # For retrieving attachments from messages in the WWW interface
5 package PublicInbox::WwwAttach; # internal package
7 use parent qw(PublicInbox::GzipFilter);
8 use bytes (); # only for bytes::length
9 use PublicInbox::EmlContentFoo qw(parse_content_type);
12 sub referer_match ($) {
14 my $env = $ctx->{env};
15 my $referer = $env->{HTTP_REFERER} // '';
16 return 1 if $referer eq ''; # no referer is always OK for wget/curl
18 # prevent deep-linking from other domains on some browsers (Firefox)
19 # n.b.: $ctx->{ibx}->base_url($env) with INBOX_URL won't work
20 # with dillo, we can only match "$url_scheme://$HTTP_HOST/" without
22 my $base_url = $env->{'psgi.url_scheme'} . '://' .
24 "$env->{SERVER_NAME}:$env->{SERVER_PORT}") . '/';
25 index($referer, $base_url) == 0;
28 sub get_attach_i { # ->each_part callback
29 my ($part, $depth, $idx) = @{$_[0]};
31 return if $idx ne $ctx->{idx}; # [0-9]+(?:\.[0-9]+)+
32 my $res = $ctx->{res};
34 my $ct = $part->content_type;
35 $ct = parse_content_type($ct) if $ct;
37 if ($ct && (($ct->{type} || '') eq 'text')) {
38 # display all text as text/plain:
39 my $cset = $ct->{attributes}->{charset};
40 if ($cset && ($cset =~ /\A[a-zA-Z0-9_\-]+\z/)) {
41 $res->[1]->[1] .= qq(; charset=$cset);
43 $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res->[1],
45 $part = $ctx->zflush($part->body);
46 } else { # TODO: allow user to configure safe types
47 if (referer_match($ctx)) {
48 $res->[1]->[1] = 'application/octet-stream';
52 $res->[1]->[1] = 'text/plain';
53 $part = "Deep-linking prevented\n";
56 push @{$res->[1]}, 'Content-Length', bytes::length($part);
57 $res->[2]->[0] = $part;
60 sub async_eml { # for async_blob_cb
62 eval { $eml->each_part(\&get_attach_i, $ctx, 1) };
64 $ctx->{res}->[0] = 500;
71 my $ctx = $http->{forward} or return; # client aborted
72 # finally, we call the user-supplied callback
73 eval { $ctx->{wcb}->($ctx->{res}) };
77 sub scan_attach ($) { # public-inbox-httpd only
79 $ctx->{env}->{'psgix.io'}->{forward} = $ctx;
80 $ctx->smsg_blob($ctx->{smsg});
83 # /$LISTNAME/$MESSAGE_ID/$IDX-$FILENAME
84 sub get_attach ($$$) {
85 my ($ctx, $idx, $fn) = @_;
86 $ctx->{res} = [ 404, [ 'Content-Type' => 'text/plain' ],
89 bless $ctx, __PACKAGE__;
91 if ($ctx->{smsg} = $ctx->{ibx}->smsg_by_mid($ctx->{mid})) {
92 return sub { # public-inbox-httpd-only
95 } if $ctx->{env}->{'pi-httpd.async'};
97 $eml = $ctx->{ibx}->smsg_eml($ctx->{smsg});
98 } elsif (!$ctx->{ibx}->over) {
99 if (my $bref = $ctx->{ibx}->msg_by_mid($ctx->{mid})) {
100 $eml = PublicInbox::Eml->new($bref);
103 $eml->each_part(\&get_attach_i, $ctx, 1) if $eml;