use Email::MIME;
use Email::MIME::ContentType qw/parse_content_type/;
use Email::Filter;
-use IPC::Open2;
+use IPC::Run;
our $VERSION = '0.0.1';
# start with the same defaults as mailman
# HTML-only, non-multipart
my $body = $simple->body;
my $ct_parsed = parse_content_type($content_type);
- dump_html($body, $ct_parsed->{attributes}->{charset});
+ dump_html(\$body, $ct_parsed->{attributes}->{charset});
replace_body($simple, $body);
return 1;
} elsif ($content_type =~ m!\bmultipart/!i) {
my ($simple, $part) = @_;
my $body = $part->body;
my $ct_parsed = parse_content_type($part->content_type);
- dump_html($body, $ct_parsed->{attributes}->{charset});
+ dump_html(\$body, $ct_parsed->{attributes}->{charset});
replace_part($simple, $part, $body, 'text/plain');
}
# modifies $_[0] in place
sub dump_html {
- my $charset = $_[1] || 'US-ASCII';
- my $cmd = "lynx -stdin -dump";
+ my ($body, $charset) = @_;
+ $charset ||= 'US-ASCII';
+ my @cmd = qw(lynx -stdin -stderr -dump);
+ my $out = "";
+ my $err = "";
# be careful about remote command injection!
if ($charset =~ /\A[A-Za-z0-9\-]+\z/) {
- $cmd .= " -assume_charset=$charset";
+ push @cmd, "-assume_charset=$charset";
}
-
- my $pid = open2(my $out, my $in, $cmd);
- print $in $_[0];
- close $in;
- {
- local $/;
- $_[0] = <$out>;
+ if (IPC::Run::run(\@cmd, $body, \$out, \$err)) {
+ $$body = $out;
+ } else {
+ # give them an ugly version:
+ $$body = "public-inbox HTML conversion failed: $err\n" .
+ $$body . "\n";
}
- waitpid($pid, 0);
}
# this is to correct user errors and not expected to cover all corner cases
} else {
$rejected++;
}
+ } elsif ($part_type =~ m!\Aapplication/pgp-signature\z!i) {
+ # PGP signatures are not huge, we may keep them.
+ # They can only be valid if it's the last element,
+ # so we keep them iff the message is unmodified:
+ if ($rejected == 0 && !@html) {
+ push @keep, $part;
+ }
} else {
- # reject everything else
- #
- # Yes, we drop GPG/PGP signatures because:
- # * hardly anybody bothers to verify signatures
- # * we strip/convert HTML parts, which could invalidate
- # the signature
- # * they increase the size of messages greatly
- # (especially short ones)
- # * they do not compress well
- #
- # Instead, rely on soft verification measures:
- # * content of the message is most important
- # * we encourage Cc: all replies, so replies go to
- # the original sender
- # * Received, User-Agent, and similar headers
- # (this is also to encourage using self-hosted mail
- # servers (using 100% Free Software, of course :)
- #
- # Furthermore, identity theft is uncommon in Free/Open
- # Source, even in communities where signatures are rare.
+ # reject everything else, including non-PGP signatures
$rejected++;
}
});
mark_changed($simple);
}
-# run the file(1) command to detect mime type
-# Not using File::MMagic for now since that requires extra configuration
-# Note: we do not rewrite the message with the detected mime type
+# Check for display-able text, no messed up binaries
+# Note: we can not rewrite the message with the detected mime type
sub recheck_type_ok {
my ($part) = @_;
- my $cmd = "file --mime-type -b -";
- my $pid = open2(my $out, my $in, $cmd);
- print $in $part->body;
- close $in;
- my $type = eval {
- local $/;
- <$out>;
- };
- waitpid($pid, 0);
- chomp $type;
-
- (($type =~ $MIME_TEXT_ANY) && ($type !~ $MIME_HTML))
+ my $s = $part->body;
+ ((bytes::length($s) < 0x10000) &&
+ ($s =~ /\A([\P{XPosixPrint}\f\n\r\t]+)\z/))
}
1;