-# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
+# Copyright (C) 2013-2015 all contributors <meta@public-inbox.org>
# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
#
+# Used to filter incoming mail for -mda and importers
# This only exposes one function: run
# Note: the settings here are highly opinionated. Obviously, this is
# Free Software (AGPLv3), so you may change it if you host yourself.
use IPC::Run;
our $VERSION = '0.0.1';
use constant NO_HTML => '*** We only accept plain-text email, no HTML ***';
+use constant TEXT_ONLY => '*** We only accept plain-text email ***';
# start with the same defaults as mailman
our $BAD_EXT = qr/\.(exe|bat|cmd|com|pif|scr|vbs|cpl|zip)\s*\z/i;
-our $MIME_HTML = qr!\btext/html\b!i;
+our $MIME_HTML = qr!\btext/x?html\b!i;
our $MIME_TEXT_ANY = qr!\btext/[a-z0-9\+\._-]+\b!i;
# this is highly opinionated delivery
my $content_type = $mime->header('Content-Type') || 'text/plain';
- # kill potentially bad/confusing headers
- # Note: ssoma already does this, but since we mangle the message,
- # we should do this before it gets to ssoma.
- # We also kill Mail-{Followup,Reply}-To and Reply-To headers due to
- # the nature of public-inbox having no real subscribers.
- foreach my $d (qw(status lines content-length
- mail-followup-to mail-reply-to reply-to)) {
- $mime->header_set($d);
- }
-
if ($content_type =~ m!\btext/plain\b!i) {
return 1; # yay, nothing to do
} elsif ($content_type =~ $MIME_HTML) {
} elsif ($content_type =~ m!\bmultipart/!i) {
return strip_multipart($mime, $content_type, $filter);
} else {
+ $filter->reject(TEXT_ONLY) if $filter;
replace_body($mime, "$content_type message scrubbed");
return 0;
}
}
}
-# this is to correct user errors and not expected to cover all corner cases
-# if users don't want to hit this, they should be sending text/plain messages
-# unfortunately, too many people send HTML mail and we'll attempt to convert
-# it to something safer, smaller and harder-to-spy-on-users-with.
+# this is to correct old archives during import.
sub strip_multipart {
my ($mime, $content_type, $filter) = @_;
if (recheck_type_ok($part)) {
push @keep, $part;
} elsif ($filter) {
- $filter->reject('no attachments')
+ $filter->reject("Bad attachment: $part_type ".
+ TEXT_ONLY);
} else {
$rejected++;
}
if ($rejected == 0 && !@html) {
push @keep, $part;
}
+ } elsif ($filter) {
+ $filter->reject("unacceptable mime-type: $part_type ".
+ TEXT_ONLY);
} else {
- $filter->reject('no attachments') if $filter;
# reject everything else, including non-PGP signatures
$rejected++;
}
sub recheck_type_ok {
my ($part) = @_;
my $s = $part->body;
- ((bytes::length($s) < 0x10000) &&
- ($s =~ /\A([\P{XPosixPrint}\f\n\r\t]+)\z/))
+ ((length($s) < 0x10000) && ($s =~ /\A([[:print:]\s]+)\z/s));
}
1;