1 # Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
2 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
7 use PublicInbox::Filter;
10 my ($bodies, $part) = @_;
11 my $body = $part->body_raw;
14 $bodies->{$body} ||= 0;
18 # multipart/alternative: HTML and quoted-printable, keep the plain-text
20 my $html_body = "<html><body>hi</body></html>";
24 content_type => 'text/html; charset=UTF-8',
31 content_type => 'text/plain',
32 encoding => 'quoted-printable',
37 my $email = Email::MIME->create(
39 From => 'a@example.com',
41 'Content-Type' => 'multipart/alternative'
45 is(1, PublicInbox::Filter->run($email), "run was a success");
46 my $parsed = Email::MIME->new($email->as_string);
47 is("text/plain", $parsed->header("Content-Type"));
48 is(scalar $parsed->parts, 1, "HTML part removed");
50 $parsed->walk_parts(sub {
52 return if $part->subparts; # walk_parts already recurses
53 count_body_parts(\%bodies, $part);
55 is(scalar keys %bodies, 1, "one bodies");
56 is($bodies{"hi =3D \"bye\"="}, 1, "QP text part unchanged");
57 $parsed->walk_parts(sub {
61 is($b, "hi = \"bye\"", "decoded body matches");
65 # plain-text email is passed through unchanged
67 my $s = Email::MIME->create(
69 From => 'a@example.com',
70 To => 'b@example.com',
71 'Content-Type' => 'text/plain',
72 Subject => 'this is a subject',
74 body => "hello world\n",
76 is(1, PublicInbox::Filter->run($s), "run was a success");
79 # convert single-part HTML to plain-text
81 my $s = Email::MIME->create(
83 From => 'a@example.com',
84 To => 'b@example.com',
85 'Content-Type' => 'text/html',
86 Subject => 'HTML only badness',
88 body => "<html><body>bad body</body></html>\n",
90 is(1, PublicInbox::Filter->run($s), "run was a success");
91 unlike($s->as_string, qr/<html>/, "HTML removed");
92 is("text/plain", $s->header("Content-Type"),
93 "content-type changed");
94 like($s->body, qr/\A\s*bad body\s*\z/, "body");
95 like($s->header("X-Content-Filtered-By"),
96 qr/PublicInbox::Filter/, "XCFB header added");
99 # multipart/alternative: HTML and plain-text, keep the plain-text
101 my $html_body = "<html><body>hi</body></html>";
105 content_type => 'text/html; charset=UTF-8',
106 encoding => 'base64',
112 content_type => 'text/plain',
117 my $email = Email::MIME->create(
119 From => 'a@example.com',
121 'Content-Type' => 'multipart/alternative'
125 is(1, PublicInbox::Filter->run($email), "run was a success");
126 my $parsed = Email::MIME->new($email->as_string);
127 is("text/plain", $parsed->header("Content-Type"));
128 is(scalar $parsed->parts, 1, "HTML part removed");
130 $parsed->walk_parts(sub {
132 return if $part->subparts; # walk_parts already recurses
133 count_body_parts(\%bodies, $part);
135 is(scalar keys %bodies, 1, "one bodies");
136 is($bodies{"hi"}, 1, "plain text part unchanged");
139 # multi-part plain-text-only
143 attributes => { content_type => 'text/plain', },
147 attributes => { content_type => 'text/plain', },
151 my $email = Email::MIME->create(
152 header_str => [ From => 'a@example.com', Subject => 'blah' ],
155 is(1, PublicInbox::Filter->run($email), "run was a success");
156 my $parsed = Email::MIME->new($email->as_string);
157 is(scalar $parsed->parts, 2, "still 2 parts");
159 $parsed->walk_parts(sub {
161 return if $part->subparts; # walk_parts already recurses
162 count_body_parts(\%bodies, $part);
164 is(scalar keys %bodies, 2, "two bodies");
165 is($bodies{"bye"}, 1, "bye part exists");
166 is($bodies{"hi"}, 1, "hi part exists");
167 is($parsed->header("X-Content-Filtered-By"), undef,
168 "XCFB header unset");
171 # multi-part HTML, several HTML parts
176 content_type => 'text/html',
177 encoding => 'base64',
179 body => '<html><body>b64 body</body></html>',
183 content_type => 'text/html',
184 encoding => 'quoted-printable',
186 body => '<html><body>qp body</body></html>',
189 my $email = Email::MIME->create(
190 header_str => [ From => 'a@example.com', Subject => 'blah' ],
193 is(1, PublicInbox::Filter->run($email), "run was a success");
194 my $parsed = Email::MIME->new($email->as_string);
195 is(scalar $parsed->parts, 2, "still 2 parts");
197 $parsed->walk_parts(sub {
199 return if $part->subparts; # walk_parts already recurses
200 count_body_parts(\%bodies, $part);
202 is(scalar keys %bodies, 2, "two body parts");
203 is($bodies{"b64 body"}, 1, "base64 part converted");
204 is($bodies{"qp body"}, 1, "qp part converted");
205 like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
206 "XCFB header added");
209 # plain-text with image attachments, kill images
213 attributes => { content_type => 'text/plain' },
218 content_type => 'image/jpeg',
219 filename => 'scary.jpg',
220 encoding => 'base64',
225 my $email = Email::MIME->create(
226 header_str => [ From => 'a@example.com', Subject => 'blah' ],
229 is(1, PublicInbox::Filter->run($email), "run was a success");
230 my $parsed = Email::MIME->new($email->as_string);
231 is(scalar $parsed->parts, 1, "image part removed");
233 $parsed->walk_parts(sub {
235 return if $part->subparts; # walk_parts already recurses
236 count_body_parts(\%bodies, $part);
238 is(scalar keys %bodies, 1, "one body");
239 is($bodies{'see image'}, 1, 'original body exists');
240 like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
241 "XCFB header added");
249 content_type => 'image/jpeg',
250 filename => 'scary.jpg',
251 encoding => 'base64',
257 content_type => 'text/plain',
258 filename => 'scary.exe',
259 encoding => 'base64',
264 my $email = Email::MIME->create(
265 header_str => [ From => 'a@example.com', Subject => 'blah' ],
268 is(0, PublicInbox::Filter->run($email),
269 "run signaled to stop delivery");
270 my $parsed = Email::MIME->new($email->as_string);
271 is(scalar $parsed->parts, 1, "bad parts removed");
273 $parsed->walk_parts(sub {
275 return if $part->subparts; # walk_parts already recurses
276 count_body_parts(\%bodies, $part);
278 is(scalar keys %bodies, 1, "one body");
279 is($bodies{"all attachments scrubbed by PublicInbox::Filter"}, 1,
280 "attachment scrubber left its mark");
281 like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
282 "XCFB header added");
286 my $s = Email::MIME->create(
288 From => 'a@example.com',
289 To => 'b@example.com',
290 'Content-Type' => 'test/pain',
291 Subject => 'this is a subject',
293 body => "hello world\n",
295 is(0, PublicInbox::Filter->run($s), "run was a failure");
296 like($s->as_string, qr/scrubbed/, "scrubbed message");
300 my $s = Email::MIME->create(
302 From => 'a@example.com',
303 To => 'b@example.com',
304 'Content-Type' => 'text/plain',
305 'Mail-Followup-To' => 'c@example.com',
306 Subject => 'mfttest',
311 is('c@example.com', $s->header("Mail-Followup-To"),
312 "mft set correctly");
313 is(1, PublicInbox::Filter->run($s), "run succeeded for mft");
314 is(undef, $s->header("Mail-Followup-To"), "mft stripped");
317 # multi-part with application/octet-stream
319 my $os = 'application/octet-stream';
322 attributes => { content_type => $os },
327 printf("Hello world\\n");
331 /* some folks like ^L */
336 filename => 'zero.data',
337 encoding => 'base64',
340 body => ("\0" x 4096),
343 my $email = Email::MIME->create(
344 header_str => [ From => 'a@example.com', Subject => 'blah' ],
347 is(1, PublicInbox::Filter->run($email), "run was a success");
348 my $parsed = Email::MIME->new($email->as_string);
349 is(scalar $parsed->parts, 1, "only one remaining part");
350 like($parsed->header("X-Content-Filtered-By"),
351 qr/PublicInbox::Filter/, "XCFB header added");