From: Eric Wong Date: Fri, 4 Apr 2014 00:39:41 +0000 (+0000) Subject: filter: use regexp to check multipart bodies X-Git-Tag: v1.0.0~1322 X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=62405fb60d3153fbacba4936086587860f495fce;p=public-inbox.git filter: use regexp to check multipart bodies This should be safer than running file(1), which has had its share of vulnerabilities this year (early 2014) We really only care about diffs and maybe short log files, here. --- diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm index 0b4dce87..4a348217 100644 --- a/lib/PublicInbox/Filter.pm +++ b/lib/PublicInbox/Filter.pm @@ -215,23 +215,13 @@ sub replace_body { mark_changed($simple); } -# run the file(1) command to detect mime type -# Not using File::MMagic for now since that requires extra configuration -# Note: we do not rewrite the message with the detected mime type +# Check for display-able text, no messed up binaries +# Note: we can not rewrite the message with the detected mime type sub recheck_type_ok { my ($part) = @_; - my $cmd = "file --mime-type -b -"; - my $pid = open2(my $out, my $in, $cmd); - print $in $part->body; - close $in; - my $type = eval { - local $/; - <$out>; - }; - waitpid($pid, 0); - chomp $type; - - (($type =~ $MIME_TEXT_ANY) && ($type !~ $MIME_HTML)) + my $s = $part->body; + ((bytes::length($s) < 0x10000) && + ($s =~ /\A([\P{XPosixPrint}\f\n\r\t]+)\z/)) } 1; diff --git a/t/filter.t b/t/filter.t index 0aa26a5f..ac9f1f6f 100644 --- a/t/filter.t +++ b/t/filter.t @@ -291,6 +291,8 @@ int main(void) printf("Hello world\\n"); return 0; } + +/* some folks like ^L */ EOF ), Email::MIME->create(