]> Sergey Matveev's repositories - public-inbox.git/blob - t/filter.t
huge refactor of encoding handling
[public-inbox.git] / t / filter.t
1 # Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
2 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
3 use strict;
4 use warnings;
5 use Test::More;
6 use Email::MIME;
7 use PublicInbox::Filter;
8
9 sub count_body_parts {
10         my ($bodies, $part) = @_;
11         my $body = $part->body_raw;
12         $body =~ s/\A\s*//;
13         $body =~ s/\s*\z//;
14         $bodies->{$body} ||= 0;
15         $bodies->{$body}++;
16 }
17
18 # plain-text email is passed through unchanged
19 {
20         my $s = Email::MIME->create(
21                 header => [
22                         From => 'a@example.com',
23                         To => 'b@example.com',
24                         'Content-Type' => 'text/plain',
25                         Subject => 'this is a subject',
26                 ],
27                 body => "hello world\n",
28         );
29         is(1, PublicInbox::Filter->run($s), "run was a success");
30 }
31
32 # convert single-part HTML to plain-text
33 {
34         my $s = Email::MIME->create(
35                 header => [
36                         From => 'a@example.com',
37                         To => 'b@example.com',
38                         'Content-Type' => 'text/html',
39                         Subject => 'HTML only badness',
40                 ],
41                 body => "<html><body>bad body</body></html>\n",
42         );
43         is(1, PublicInbox::Filter->run($s), "run was a success");
44         unlike($s->as_string, qr/<html>/, "HTML removed");
45         is("text/plain", $s->header("Content-Type"),
46                 "content-type changed");
47         like($s->body, qr/\A\s*bad body\s*\z/, "body");
48         like($s->header("X-Content-Filtered-By"),
49                 qr/PublicInbox::Filter/, "XCFB header added");
50 }
51
52 # multipart/alternative: HTML and plain-text, keep the plain-text
53 {
54         my $html_body = "<html><body>hi</body></html>";
55         my $parts = [
56                 Email::MIME->create(
57                         attributes => {
58                                 content_type => 'text/html; charset=UTF-8',
59                                 encoding => 'base64',
60                         },
61                         body => $html_body,
62                 ),
63                 Email::MIME->create(
64                         attributes => {
65                                 content_type => 'text/plain',
66                         },
67                         body=> 'hi',
68                 )
69         ];
70         my $email = Email::MIME->create(
71                 header_str => [
72                   From => 'a@example.com',
73                   Subject => 'blah',
74                   'Content-Type' => 'multipart/alternative'
75                 ],
76                 parts => $parts,
77         );
78         is(1, PublicInbox::Filter->run($email), "run was a success");
79         my $parsed = Email::MIME->new($email->as_string);
80         is("text/plain", $parsed->header("Content-Type"));
81         is(scalar $parsed->parts, 1, "HTML part removed");
82         my %bodies;
83         $parsed->walk_parts(sub {
84                 my ($part) = @_;
85                 return if $part->subparts; # walk_parts already recurses
86                 count_body_parts(\%bodies, $part);
87         });
88         is(scalar keys %bodies, 1, "one bodies");
89         is($bodies{"hi"}, 1, "plain text part unchanged");
90 }
91
92 # multi-part plain-text-only
93 {
94         my $parts = [
95                 Email::MIME->create(
96                         attributes => { content_type => 'text/plain', },
97                         body => 'hi',
98                 ),
99                 Email::MIME->create(
100                         attributes => { content_type => 'text/plain', },
101                         body => 'bye',
102                 )
103         ];
104         my $email = Email::MIME->create(
105                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
106                 parts => $parts,
107         );
108         is(1, PublicInbox::Filter->run($email), "run was a success");
109         my $parsed = Email::MIME->new($email->as_string);
110         is(scalar $parsed->parts, 2, "still 2 parts");
111         my %bodies;
112         $parsed->walk_parts(sub {
113                 my ($part) = @_;
114                 return if $part->subparts; # walk_parts already recurses
115                 count_body_parts(\%bodies, $part);
116         });
117         is(scalar keys %bodies, 2, "two bodies");
118         is($bodies{"bye"}, 1, "bye part exists");
119         is($bodies{"hi"}, 1, "hi part exists");
120         is($parsed->header("X-Content-Filtered-By"), undef,
121                 "XCFB header unset");
122 }
123
124 # multi-part HTML, several HTML parts
125 {
126         my $parts = [
127                 Email::MIME->create(
128                         attributes => {
129                                 content_type => 'text/html',
130                                 encoding => 'base64',
131                         },
132                         body => '<html><body>b64 body</body></html>',
133                 ),
134                 Email::MIME->create(
135                         attributes => {
136                                 content_type => 'text/html',
137                                 encoding => 'quoted-printable',
138                         },
139                         body => '<html><body>qp body</body></html>',
140                 )
141         ];
142         my $email = Email::MIME->create(
143                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
144                 parts => $parts,
145         );
146         is(1, PublicInbox::Filter->run($email), "run was a success");
147         my $parsed = Email::MIME->new($email->as_string);
148         is(scalar $parsed->parts, 2, "still 2 parts");
149         my %bodies;
150         $parsed->walk_parts(sub {
151                 my ($part) = @_;
152                 return if $part->subparts; # walk_parts already recurses
153                 count_body_parts(\%bodies, $part);
154         });
155         is(scalar keys %bodies, 2, "two body parts");
156         is($bodies{"b64 body"}, 1, "base64 part converted");
157         is($bodies{"qp body"}, 1, "qp part converted");
158         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
159              "XCFB header added");
160 }
161
162 # plain-text with image attachments, kill images
163 {
164         my $parts = [
165                 Email::MIME->create(
166                         attributes => { content_type => 'text/plain' },
167                         body => 'see image',
168                 ),
169                 Email::MIME->create(
170                         attributes => {
171                                 content_type => 'image/jpeg',
172                                 filename => 'scary.jpg',
173                                 encoding => 'base64',
174                         },
175                         body => 'bad',
176                 )
177         ];
178         my $email = Email::MIME->create(
179                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
180                 parts => $parts,
181         );
182         is(1, PublicInbox::Filter->run($email), "run was a success");
183         my $parsed = Email::MIME->new($email->as_string);
184         is(scalar $parsed->parts, 1, "image part removed");
185         my %bodies;
186         $parsed->walk_parts(sub {
187                 my ($part) = @_;
188                 return if $part->subparts; # walk_parts already recurses
189                 count_body_parts(\%bodies, $part);
190         });
191         is(scalar keys %bodies, 1, "one body");
192         is($bodies{'see image'}, 1, 'original body exists');
193         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
194              "XCFB header added");
195 }
196
197 # all bad
198 {
199         my $parts = [
200                 Email::MIME->create(
201                         attributes => {
202                                 content_type => 'image/jpeg',
203                                 filename => 'scary.jpg',
204                                 encoding => 'base64',
205                         },
206                         body => 'bad',
207                 ),
208                 Email::MIME->create(
209                         attributes => {
210                                 content_type => 'text/plain',
211                                 filename => 'scary.exe',
212                                 encoding => 'base64',
213                         },
214                         body => 'bad',
215                 ),
216         ];
217         my $email = Email::MIME->create(
218                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
219                 parts => $parts,
220         );
221         is(0, PublicInbox::Filter->run($email),
222                 "run signaled to stop delivery");
223         my $parsed = Email::MIME->new($email->as_string);
224         is(scalar $parsed->parts, 1, "bad parts removed");
225         my %bodies;
226         $parsed->walk_parts(sub {
227                 my ($part) = @_;
228                 return if $part->subparts; # walk_parts already recurses
229                 count_body_parts(\%bodies, $part);
230         });
231         is(scalar keys %bodies, 1, "one body");
232         is($bodies{"all attachments scrubbed by PublicInbox::Filter"}, 1,
233            "attachment scrubber left its mark");
234         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
235              "XCFB header added");
236 }
237
238 {
239         my $s = Email::MIME->create(
240                 header => [
241                         From => 'a@example.com',
242                         To => 'b@example.com',
243                         'Content-Type' => 'test/pain',
244                         Subject => 'this is a subject',
245                 ],
246                 body => "hello world\n",
247         );
248         is(0, PublicInbox::Filter->run($s), "run was a failure");
249         like($s->as_string, qr/scrubbed/, "scrubbed message");
250 }
251
252 {
253         my $s = Email::MIME->create(
254                 header => [
255                         From => 'a@example.com',
256                         To => 'b@example.com',
257                         'Content-Type' => 'text/plain',
258                         'Mail-Followup-To' => 'c@example.com',
259                         Subject => 'mfttest',
260                 ],
261                 body => "mft\n",
262         );
263
264         is('c@example.com', $s->header("Mail-Followup-To"),
265                 "mft set correctly");
266         is(1, PublicInbox::Filter->run($s), "run succeeded for mft");
267         is(undef, $s->header("Mail-Followup-To"), "mft stripped");
268 }
269
270 # multi-part with application/octet-stream
271 {
272         my $os = 'application/octet-stream';
273         my $parts = [
274                 Email::MIME->create(
275                         attributes => { content_type => $os },
276                         body => <<EOF
277 #include <stdio.h>
278 int main(void)
279 {
280         printf("Hello world\\n");
281         return 0;
282 }
283 \f
284 /* some folks like ^L */
285 EOF
286                 ),
287                 Email::MIME->create(
288                         attributes => {
289                                 filename => 'zero.data',
290                                 encoding => 'base64',
291                                 content_type => $os,
292                         },
293                         body => ("\0" x 4096),
294                 )
295         ];
296         my $email = Email::MIME->create(
297                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
298                 parts => $parts,
299         );
300         is(1, PublicInbox::Filter->run($email), "run was a success");
301         my $parsed = Email::MIME->new($email->as_string);
302         is(scalar $parsed->parts, 1, "only one remaining part");
303         like($parsed->header("X-Content-Filtered-By"),
304                 qr/PublicInbox::Filter/, "XCFB header added");
305 }
306
307 done_testing();