]> Sergey Matveev's repositories - public-inbox.git/blob - t/filter.t
filter: preserve QP when collapsing multipart
[public-inbox.git] / t / filter.t
1 # Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
2 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
3 use strict;
4 use warnings;
5 use Test::More;
6 use Email::MIME;
7 use PublicInbox::Filter;
8
9 sub count_body_parts {
10         my ($bodies, $part) = @_;
11         my $body = $part->body_raw;
12         $body =~ s/\A\s*//;
13         $body =~ s/\s*\z//;
14         $bodies->{$body} ||= 0;
15         $bodies->{$body}++;
16 }
17
18 # multipart/alternative: HTML and quoted-printable, keep the plain-text
19 {
20         my $html_body = "<html><body>hi</body></html>";
21         my $parts = [
22                 Email::MIME->create(
23                         attributes => {
24                                 content_type => 'text/html; charset=UTF-8',
25                                 encoding => 'base64',
26                         },
27                         body => $html_body,
28                 ),
29                 Email::MIME->create(
30                         attributes => {
31                                 content_type => 'text/plain',
32                                 encoding => 'quoted-printable',
33                         },
34                         body => 'hi = "bye"',
35                 )
36         ];
37         my $email = Email::MIME->create(
38                 header_str => [
39                   From => 'a@example.com',
40                   Subject => 'blah',
41                   'Content-Type' => 'multipart/alternative'
42                 ],
43                 parts => $parts,
44         );
45         is(1, PublicInbox::Filter->run($email), "run was a success");
46         my $parsed = Email::MIME->new($email->as_string);
47         is("text/plain", $parsed->header("Content-Type"));
48         is(scalar $parsed->parts, 1, "HTML part removed");
49         my %bodies;
50         $parsed->walk_parts(sub {
51                 my ($part) = @_;
52                 return if $part->subparts; # walk_parts already recurses
53                 count_body_parts(\%bodies, $part);
54         });
55         is(scalar keys %bodies, 1, "one bodies");
56         is($bodies{"hi =3D \"bye\"="}, 1, "QP text part unchanged");
57         $parsed->walk_parts(sub {
58                 my ($part) = @_;
59                 my $b = $part->body;
60                 $b =~ s/\s*\z//;
61                 is($b, "hi = \"bye\"", "decoded body matches");
62         });
63 }
64
65 # plain-text email is passed through unchanged
66 {
67         my $s = Email::MIME->create(
68                 header => [
69                         From => 'a@example.com',
70                         To => 'b@example.com',
71                         'Content-Type' => 'text/plain',
72                         Subject => 'this is a subject',
73                 ],
74                 body => "hello world\n",
75         );
76         is(1, PublicInbox::Filter->run($s), "run was a success");
77 }
78
79 # convert single-part HTML to plain-text
80 {
81         my $s = Email::MIME->create(
82                 header => [
83                         From => 'a@example.com',
84                         To => 'b@example.com',
85                         'Content-Type' => 'text/html',
86                         Subject => 'HTML only badness',
87                 ],
88                 body => "<html><body>bad body</body></html>\n",
89         );
90         is(1, PublicInbox::Filter->run($s), "run was a success");
91         unlike($s->as_string, qr/<html>/, "HTML removed");
92         is("text/plain", $s->header("Content-Type"),
93                 "content-type changed");
94         like($s->body, qr/\A\s*bad body\s*\z/, "body");
95         like($s->header("X-Content-Filtered-By"),
96                 qr/PublicInbox::Filter/, "XCFB header added");
97 }
98
99 # multipart/alternative: HTML and plain-text, keep the plain-text
100 {
101         my $html_body = "<html><body>hi</body></html>";
102         my $parts = [
103                 Email::MIME->create(
104                         attributes => {
105                                 content_type => 'text/html; charset=UTF-8',
106                                 encoding => 'base64',
107                         },
108                         body => $html_body,
109                 ),
110                 Email::MIME->create(
111                         attributes => {
112                                 content_type => 'text/plain',
113                         },
114                         body=> 'hi',
115                 )
116         ];
117         my $email = Email::MIME->create(
118                 header_str => [
119                   From => 'a@example.com',
120                   Subject => 'blah',
121                   'Content-Type' => 'multipart/alternative'
122                 ],
123                 parts => $parts,
124         );
125         is(1, PublicInbox::Filter->run($email), "run was a success");
126         my $parsed = Email::MIME->new($email->as_string);
127         is("text/plain", $parsed->header("Content-Type"));
128         is(scalar $parsed->parts, 1, "HTML part removed");
129         my %bodies;
130         $parsed->walk_parts(sub {
131                 my ($part) = @_;
132                 return if $part->subparts; # walk_parts already recurses
133                 count_body_parts(\%bodies, $part);
134         });
135         is(scalar keys %bodies, 1, "one bodies");
136         is($bodies{"hi"}, 1, "plain text part unchanged");
137 }
138
139 # multi-part plain-text-only
140 {
141         my $parts = [
142                 Email::MIME->create(
143                         attributes => { content_type => 'text/plain', },
144                         body => 'hi',
145                 ),
146                 Email::MIME->create(
147                         attributes => { content_type => 'text/plain', },
148                         body => 'bye',
149                 )
150         ];
151         my $email = Email::MIME->create(
152                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
153                 parts => $parts,
154         );
155         is(1, PublicInbox::Filter->run($email), "run was a success");
156         my $parsed = Email::MIME->new($email->as_string);
157         is(scalar $parsed->parts, 2, "still 2 parts");
158         my %bodies;
159         $parsed->walk_parts(sub {
160                 my ($part) = @_;
161                 return if $part->subparts; # walk_parts already recurses
162                 count_body_parts(\%bodies, $part);
163         });
164         is(scalar keys %bodies, 2, "two bodies");
165         is($bodies{"bye"}, 1, "bye part exists");
166         is($bodies{"hi"}, 1, "hi part exists");
167         is($parsed->header("X-Content-Filtered-By"), undef,
168                 "XCFB header unset");
169 }
170
171 # multi-part HTML, several HTML parts
172 {
173         my $parts = [
174                 Email::MIME->create(
175                         attributes => {
176                                 content_type => 'text/html',
177                                 encoding => 'base64',
178                         },
179                         body => '<html><body>b64 body</body></html>',
180                 ),
181                 Email::MIME->create(
182                         attributes => {
183                                 content_type => 'text/html',
184                                 encoding => 'quoted-printable',
185                         },
186                         body => '<html><body>qp body</body></html>',
187                 )
188         ];
189         my $email = Email::MIME->create(
190                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
191                 parts => $parts,
192         );
193         is(1, PublicInbox::Filter->run($email), "run was a success");
194         my $parsed = Email::MIME->new($email->as_string);
195         is(scalar $parsed->parts, 2, "still 2 parts");
196         my %bodies;
197         $parsed->walk_parts(sub {
198                 my ($part) = @_;
199                 return if $part->subparts; # walk_parts already recurses
200                 count_body_parts(\%bodies, $part);
201         });
202         is(scalar keys %bodies, 2, "two body parts");
203         is($bodies{"b64 body"}, 1, "base64 part converted");
204         is($bodies{"qp body"}, 1, "qp part converted");
205         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
206              "XCFB header added");
207 }
208
209 # plain-text with image attachments, kill images
210 {
211         my $parts = [
212                 Email::MIME->create(
213                         attributes => { content_type => 'text/plain' },
214                         body => 'see image',
215                 ),
216                 Email::MIME->create(
217                         attributes => {
218                                 content_type => 'image/jpeg',
219                                 filename => 'scary.jpg',
220                                 encoding => 'base64',
221                         },
222                         body => 'bad',
223                 )
224         ];
225         my $email = Email::MIME->create(
226                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
227                 parts => $parts,
228         );
229         is(1, PublicInbox::Filter->run($email), "run was a success");
230         my $parsed = Email::MIME->new($email->as_string);
231         is(scalar $parsed->parts, 1, "image part removed");
232         my %bodies;
233         $parsed->walk_parts(sub {
234                 my ($part) = @_;
235                 return if $part->subparts; # walk_parts already recurses
236                 count_body_parts(\%bodies, $part);
237         });
238         is(scalar keys %bodies, 1, "one body");
239         is($bodies{'see image'}, 1, 'original body exists');
240         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
241              "XCFB header added");
242 }
243
244 # all bad
245 {
246         my $parts = [
247                 Email::MIME->create(
248                         attributes => {
249                                 content_type => 'image/jpeg',
250                                 filename => 'scary.jpg',
251                                 encoding => 'base64',
252                         },
253                         body => 'bad',
254                 ),
255                 Email::MIME->create(
256                         attributes => {
257                                 content_type => 'text/plain',
258                                 filename => 'scary.exe',
259                                 encoding => 'base64',
260                         },
261                         body => 'bad',
262                 ),
263         ];
264         my $email = Email::MIME->create(
265                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
266                 parts => $parts,
267         );
268         is(0, PublicInbox::Filter->run($email),
269                 "run signaled to stop delivery");
270         my $parsed = Email::MIME->new($email->as_string);
271         is(scalar $parsed->parts, 1, "bad parts removed");
272         my %bodies;
273         $parsed->walk_parts(sub {
274                 my ($part) = @_;
275                 return if $part->subparts; # walk_parts already recurses
276                 count_body_parts(\%bodies, $part);
277         });
278         is(scalar keys %bodies, 1, "one body");
279         is($bodies{"all attachments scrubbed by PublicInbox::Filter"}, 1,
280            "attachment scrubber left its mark");
281         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
282              "XCFB header added");
283 }
284
285 {
286         my $s = Email::MIME->create(
287                 header => [
288                         From => 'a@example.com',
289                         To => 'b@example.com',
290                         'Content-Type' => 'test/pain',
291                         Subject => 'this is a subject',
292                 ],
293                 body => "hello world\n",
294         );
295         is(0, PublicInbox::Filter->run($s), "run was a failure");
296         like($s->as_string, qr/scrubbed/, "scrubbed message");
297 }
298
299 {
300         my $s = Email::MIME->create(
301                 header => [
302                         From => 'a@example.com',
303                         To => 'b@example.com',
304                         'Content-Type' => 'text/plain',
305                         'Mail-Followup-To' => 'c@example.com',
306                         Subject => 'mfttest',
307                 ],
308                 body => "mft\n",
309         );
310
311         is('c@example.com', $s->header("Mail-Followup-To"),
312                 "mft set correctly");
313         is(1, PublicInbox::Filter->run($s), "run succeeded for mft");
314         is(undef, $s->header("Mail-Followup-To"), "mft stripped");
315 }
316
317 # multi-part with application/octet-stream
318 {
319         my $os = 'application/octet-stream';
320         my $parts = [
321                 Email::MIME->create(
322                         attributes => { content_type => $os },
323                         body => <<EOF
324 #include <stdio.h>
325 int main(void)
326 {
327         printf("Hello world\\n");
328         return 0;
329 }
330 \f
331 /* some folks like ^L */
332 EOF
333                 ),
334                 Email::MIME->create(
335                         attributes => {
336                                 filename => 'zero.data',
337                                 encoding => 'base64',
338                                 content_type => $os,
339                         },
340                         body => ("\0" x 4096),
341                 )
342         ];
343         my $email = Email::MIME->create(
344                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
345                 parts => $parts,
346         );
347         is(1, PublicInbox::Filter->run($email), "run was a success");
348         my $parsed = Email::MIME->new($email->as_string);
349         is(scalar $parsed->parts, 1, "only one remaining part");
350         like($parsed->header("X-Content-Filtered-By"),
351                 qr/PublicInbox::Filter/, "XCFB header added");
352 }
353
354 done_testing();