1 # Copyright (C) 2013-2015 all contributors <meta@public-inbox.org>
2 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
7 use PublicInbox::Filter;
10 my ($bodies, $part) = @_;
11 my $body = $part->body_raw;
14 $bodies->{$body} ||= 0;
18 # multipart/alternative: HTML and quoted-printable, keep the plain-text
20 my $html_body = "<html><body>hi</body></html>";
24 content_type => 'text/html; charset=UTF-8',
31 content_type => 'text/plain',
32 encoding => 'quoted-printable',
37 my $email = Email::MIME->create(
39 From => 'a@example.com',
41 'Content-Type' => 'multipart/alternative'
45 is(1, PublicInbox::Filter->run($email), "run was a success");
46 my $parsed = Email::MIME->new($email->as_string);
47 is("text/plain", $parsed->header("Content-Type"));
48 is(scalar $parsed->parts, 1, "HTML part removed");
50 $parsed->walk_parts(sub {
52 return if $part->subparts; # walk_parts already recurses
53 count_body_parts(\%bodies, $part);
55 is(scalar keys %bodies, 1, "one bodies");
56 is($bodies{"hi =3D \"bye\"="}, 1, "QP text part unchanged");
57 $parsed->walk_parts(sub {
61 is($b, "hi = \"bye\"", "decoded body matches");
65 # plain-text email is passed through unchanged
67 my $s = Email::MIME->create(
69 From => 'a@example.com',
70 To => 'b@example.com',
71 'Content-Type' => 'text/plain',
72 Subject => 'this is a subject',
74 body => "hello world\n",
76 is(1, PublicInbox::Filter->run($s), "run was a success");
79 # convert single-part HTML to plain-text
81 my $s = Email::MIME->create(
83 From => 'a@example.com',
84 To => 'b@example.com',
85 'Content-Type' => 'text/html',
86 Subject => 'HTML only badness',
88 body => "<html><body>bad body\r\n</body></html>\n",
90 is(1, PublicInbox::Filter->run($s), "run was a success");
91 unlike($s->as_string, qr/<html>/, "HTML removed");
92 is("text/plain", $s->header("Content-Type"),
93 "content-type changed");
94 like($s->body, qr/\A\s*bad body\s*\z/, "body");
95 unlike($s->body, qr/\r/, "body has no cr");
96 like($s->header("X-Content-Filtered-By"),
97 qr/PublicInbox::Filter/, "XCFB header added");
100 # multipart/alternative: HTML and plain-text, keep the plain-text
102 my $html_body = "<html><body>hi</body></html>";
106 content_type => 'text/html; charset=UTF-8',
107 encoding => 'base64',
113 content_type => 'text/plain',
118 my $email = Email::MIME->create(
120 From => 'a@example.com',
122 'Content-Type' => 'multipart/alternative'
126 is(1, PublicInbox::Filter->run($email), "run was a success");
127 my $parsed = Email::MIME->new($email->as_string);
128 is("text/plain", $parsed->header("Content-Type"));
129 is(scalar $parsed->parts, 1, "HTML part removed");
131 $parsed->walk_parts(sub {
133 return if $part->subparts; # walk_parts already recurses
134 count_body_parts(\%bodies, $part);
136 is(scalar keys %bodies, 1, "one bodies");
137 is($bodies{"hi"}, 1, "plain text part unchanged");
140 # multi-part plain-text-only
144 attributes => { content_type => 'text/plain', },
148 attributes => { content_type => 'text/plain', },
152 my $email = Email::MIME->create(
153 header_str => [ From => 'a@example.com', Subject => 'blah' ],
156 is(1, PublicInbox::Filter->run($email), "run was a success");
157 my $parsed = Email::MIME->new($email->as_string);
158 is(scalar $parsed->parts, 2, "still 2 parts");
160 $parsed->walk_parts(sub {
162 return if $part->subparts; # walk_parts already recurses
163 count_body_parts(\%bodies, $part);
165 is(scalar keys %bodies, 2, "two bodies");
166 is($bodies{"bye"}, 1, "bye part exists");
167 is($bodies{"hi"}, 1, "hi part exists");
168 is($parsed->header("X-Content-Filtered-By"), undef,
169 "XCFB header unset");
172 # multi-part HTML, several HTML parts
177 content_type => 'text/html',
178 encoding => 'base64',
180 body => '<html><body>b64 body</body></html>',
184 content_type => 'text/html',
185 encoding => 'quoted-printable',
187 body => '<html><body>qp body</body></html>',
190 my $email = Email::MIME->create(
191 header_str => [ From => 'a@example.com', Subject => 'blah' ],
194 is(1, PublicInbox::Filter->run($email), "run was a success");
195 my $parsed = Email::MIME->new($email->as_string);
196 is(scalar $parsed->parts, 2, "still 2 parts");
198 $parsed->walk_parts(sub {
200 return if $part->subparts; # walk_parts already recurses
201 count_body_parts(\%bodies, $part);
203 is(scalar keys %bodies, 2, "two body parts");
204 is($bodies{"b64 body"}, 1, "base64 part converted");
205 is($bodies{"qp body"}, 1, "qp part converted");
206 like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
207 "XCFB header added");
210 # plain-text with image attachments, kill images
214 attributes => { content_type => 'text/plain' },
219 content_type => 'image/jpeg',
220 filename => 'scary.jpg',
221 encoding => 'base64',
226 my $email = Email::MIME->create(
227 header_str => [ From => 'a@example.com', Subject => 'blah' ],
230 is(1, PublicInbox::Filter->run($email), "run was a success");
231 my $parsed = Email::MIME->new($email->as_string);
232 is(scalar $parsed->parts, 1, "image part removed");
234 $parsed->walk_parts(sub {
236 return if $part->subparts; # walk_parts already recurses
237 count_body_parts(\%bodies, $part);
239 is(scalar keys %bodies, 1, "one body");
240 is($bodies{'see image'}, 1, 'original body exists');
241 like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
242 "XCFB header added");
250 content_type => 'image/jpeg',
251 filename => 'scary.jpg',
252 encoding => 'base64',
258 content_type => 'text/plain',
259 filename => 'scary.exe',
260 encoding => 'base64',
265 my $email = Email::MIME->create(
266 header_str => [ From => 'a@example.com', Subject => 'blah' ],
269 is(0, PublicInbox::Filter->run($email),
270 "run signaled to stop delivery");
271 my $parsed = Email::MIME->new($email->as_string);
272 is(scalar $parsed->parts, 1, "bad parts removed");
274 $parsed->walk_parts(sub {
276 return if $part->subparts; # walk_parts already recurses
277 count_body_parts(\%bodies, $part);
279 is(scalar keys %bodies, 1, "one body");
280 is($bodies{"all attachments scrubbed by PublicInbox::Filter"}, 1,
281 "attachment scrubber left its mark");
282 like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
283 "XCFB header added");
287 my $s = Email::MIME->create(
289 From => 'a@example.com',
290 To => 'b@example.com',
291 'Content-Type' => 'test/pain',
292 Subject => 'this is a subject',
294 body => "hello world\n",
296 is(0, PublicInbox::Filter->run($s), "run was a failure");
297 like($s->as_string, qr/scrubbed/, "scrubbed message");
301 my $s = Email::MIME->create(
303 From => 'a@example.com',
304 To => 'b@example.com',
305 'Content-Type' => 'text/plain',
306 'Mail-Followup-To' => 'c@example.com',
307 Subject => 'mfttest',
312 is('c@example.com', $s->header("Mail-Followup-To"),
313 "mft set correctly");
314 is(1, PublicInbox::Filter->run($s), "run succeeded for mft");
315 is(undef, $s->header("Mail-Followup-To"), "mft stripped");
318 # multi-part with application/octet-stream
320 my $os = 'application/octet-stream';
323 attributes => { content_type => $os },
328 printf("Hello world\\n");
332 /* some folks like ^L */
337 filename => 'zero.data',
338 encoding => 'base64',
341 body => ("\0" x 4096),
344 my $email = Email::MIME->create(
345 header_str => [ From => 'a@example.com', Subject => 'blah' ],
348 is(1, PublicInbox::Filter->run($email), "run was a success");
349 my $parsed = Email::MIME->new($email->as_string);
350 is(scalar $parsed->parts, 1, "only one remaining part");
351 like($parsed->header("X-Content-Filtered-By"),
352 qr/PublicInbox::Filter/, "XCFB header added");