X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FFilter.pm;h=5a023cf7032b0e27573932238c6349b1464f5004;hb=f76f265a851944b5dedcc3be5f3b5224b6ebda89;hp=a1587682a3f3a8d4dd866db535b15a0623382252;hpb=e022d3377fd2c50fd9931bf96394728958a90bf3;p=public-inbox.git diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm index a1587682..5a023cf7 100644 --- a/lib/PublicInbox/Filter.pm +++ b/lib/PublicInbox/Filter.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2013, Eric Wong and all contributors +# Copyright (C) 2013-2015 all contributors # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) # # This only exposes one function: run @@ -12,16 +12,17 @@ use Email::MIME::ContentType qw/parse_content_type/; use Email::Filter; use IPC::Run; our $VERSION = '0.0.1'; +use constant NO_HTML => '*** We only accept plain-text email, no HTML ***'; # start with the same defaults as mailman -our $BAD_EXT = qr/\.(?:exe|bat|cmd|com|pif|scr|vbs|cpl)\z/i; +our $BAD_EXT = qr/\.(exe|bat|cmd|com|pif|scr|vbs|cpl|zip)\s*\z/i; our $MIME_HTML = qr!\btext/html\b!i; our $MIME_TEXT_ANY = qr!\btext/[a-z0-9\+\._-]+\b!i; # this is highly opinionated delivery # returns 0 only if there is nothing to deliver sub run { - my ($class, $mime) = @_; + my ($class, $mime, $filter) = @_; my $content_type = $mime->header('Content-Type') || 'text/plain'; @@ -38,6 +39,7 @@ sub run { if ($content_type =~ m!\btext/plain\b!i) { return 1; # yay, nothing to do } elsif ($content_type =~ $MIME_HTML) { + $filter->reject(NO_HTML) if $filter; # HTML-only, non-multipart my $body = $mime->body; my $ct_parsed = parse_content_type($content_type); @@ -45,7 +47,7 @@ sub run { replace_body($mime, $body); return 1; } elsif ($content_type =~ m!\bmultipart/!i) { - return strip_multipart($mime, $content_type); + return strip_multipart($mime, $content_type, $filter); } else { replace_body($mime, "$content_type message scrubbed"); return 0; @@ -97,6 +99,7 @@ sub dump_html { push @cmd, "-assume_charset=$charset"; } if (IPC::Run::run(\@cmd, $body, \$out, \$err)) { + $out =~ s/\r\n/\n/sg; $$body = $out; } else { # give them an ugly version: @@ -108,9 +111,9 @@ sub dump_html { # this is to correct user errors and not expected to cover all corner cases # if users don't want to hit this, they should be sending text/plain messages # unfortunately, too many people send HTML mail and we'll attempt to convert -# it to something safer, smaller and harder-to-track. +# it to something safer, smaller and harder-to-spy-on-users-with. sub strip_multipart { - my ($mime, $content_type) = @_; + my ($mime, $content_type, $filter) = @_; my (@html, @keep); my $rejected = 0; @@ -124,14 +127,16 @@ sub strip_multipart { # some extensions are just bad, reject them outright my $fn = $part->filename; if (defined($fn) && $fn =~ $BAD_EXT) { + $filter->reject("Bad file type: $1") if $filter; $rejected++; return; } - my $part_type = $part->content_type; + my $part_type = $part->content_type || ''; if ($part_type =~ m!\btext/plain\b!i) { push @keep, $part; } elsif ($part_type =~ $MIME_HTML) { + $filter->reject(NO_HTML) if $filter; push @html, $part; } elsif ($part_type =~ $MIME_TEXT_ANY) { # Give other text attachments the benefit of the doubt, @@ -139,16 +144,19 @@ sub strip_multipart { # help with. push @keep, $part; - } elsif ($part_type =~ m!\Aapplication/octet-stream\z!i) { + } elsif ($part_type eq '' || + $part_type =~ m!\bapplication/octet-stream\b!i) { # unfortunately, some mailers don't set correct types, # let messages of unknown type through but do not # change the sender-specified type if (recheck_type_ok($part)) { push @keep, $part; + } elsif ($filter) { + $filter->reject('no attachments') } else { $rejected++; } - } elsif ($part_type =~ m!\Aapplication/pgp-signature\z!i) { + } elsif ($part_type =~ m!\bapplication/pgp-signature\b!i) { # PGP signatures are not huge, we may keep them. # They can only be valid if it's the last element, # so we keep them iff the message is unmodified: @@ -156,6 +164,7 @@ sub strip_multipart { push @keep, $part; } } else { + $filter->reject('no attachments') if $filter; # reject everything else, including non-PGP signatures $rejected++; } @@ -201,6 +210,10 @@ sub collapse { my ($mime, $part) = @_; $mime->header_set('Content-Type', $part->content_type); $mime->body_set($part->body_raw); + my $cte = $part->header('Content-Transfer-Encoding'); + if (defined($cte) && $cte ne '') { + $mime->header_set('Content-Transfer-Encoding', $cte); + } mark_changed($mime); return 1; }