]> Sergey Matveev's repositories - public-inbox.git/blob - t/filter.t
emergency: implement new emergency Maildir delivery
[public-inbox.git] / t / filter.t
1 # Copyright (C) 2013-2015 all contributors <meta@public-inbox.org>
2 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
3 use strict;
4 use warnings;
5 use Test::More;
6 use Email::MIME;
7 use PublicInbox::Filter;
8
9 sub count_body_parts {
10         my ($bodies, $part) = @_;
11         my $body = $part->body_raw;
12         $body =~ s/\A\s*//;
13         $body =~ s/\s*\z//;
14         $bodies->{$body} ||= 0;
15         $bodies->{$body}++;
16 }
17
18 # multipart/alternative: HTML and quoted-printable, keep the plain-text
19 {
20         my $html_body = "<html><body>hi</body></html>";
21         my $parts = [
22                 Email::MIME->create(
23                         attributes => {
24                                 content_type => 'text/html; charset=UTF-8',
25                                 encoding => 'base64',
26                         },
27                         body => $html_body,
28                 ),
29                 Email::MIME->create(
30                         attributes => {
31                                 content_type => 'text/plain',
32                                 encoding => 'quoted-printable',
33                         },
34                         body => 'hi = "bye"',
35                 )
36         ];
37         my $email = Email::MIME->create(
38                 header_str => [
39                   From => 'a@example.com',
40                   Subject => 'blah',
41                   'Content-Type' => 'multipart/alternative'
42                 ],
43                 parts => $parts,
44         );
45         is(1, PublicInbox::Filter->run($email), "run was a success");
46         my $parsed = Email::MIME->new($email->as_string);
47         is("text/plain", $parsed->header("Content-Type"));
48         is(scalar $parsed->parts, 1, "HTML part removed");
49         my %bodies;
50         $parsed->walk_parts(sub {
51                 my ($part) = @_;
52                 return if $part->subparts; # walk_parts already recurses
53                 count_body_parts(\%bodies, $part);
54         });
55         is(scalar keys %bodies, 1, "one bodies");
56         is($bodies{"hi =3D \"bye\"="}, 1, "QP text part unchanged");
57         $parsed->walk_parts(sub {
58                 my ($part) = @_;
59                 my $b = $part->body;
60                 $b =~ s/\s*\z//;
61                 is($b, "hi = \"bye\"", "decoded body matches");
62         });
63 }
64
65 # plain-text email is passed through unchanged
66 {
67         my $s = Email::MIME->create(
68                 header => [
69                         From => 'a@example.com',
70                         To => 'b@example.com',
71                         'Content-Type' => 'text/plain',
72                         Subject => 'this is a subject',
73                 ],
74                 body => "hello world\n",
75         );
76         is(1, PublicInbox::Filter->run($s), "run was a success");
77 }
78
79 # convert single-part HTML to plain-text
80 {
81         my $s = Email::MIME->create(
82                 header => [
83                         From => 'a@example.com',
84                         To => 'b@example.com',
85                         'Content-Type' => 'text/html',
86                         Subject => 'HTML only badness',
87                 ],
88                 body => "<html><body>bad body\r\n</body></html>\n",
89         );
90         is(1, PublicInbox::Filter->run($s), "run was a success");
91         unlike($s->as_string, qr/<html>/, "HTML removed");
92         is("text/plain", $s->header("Content-Type"),
93                 "content-type changed");
94         like($s->body, qr/\A\s*bad body\s*\z/, "body");
95         unlike($s->body, qr/\r/, "body has no cr");
96         like($s->header("X-Content-Filtered-By"),
97                 qr/PublicInbox::Filter/, "XCFB header added");
98 }
99
100 # multipart/alternative: HTML and plain-text, keep the plain-text
101 {
102         my $html_body = "<html><body>hi</body></html>";
103         my $parts = [
104                 Email::MIME->create(
105                         attributes => {
106                                 content_type => 'text/html; charset=UTF-8',
107                                 encoding => 'base64',
108                         },
109                         body => $html_body,
110                 ),
111                 Email::MIME->create(
112                         attributes => {
113                                 content_type => 'text/plain',
114                         },
115                         body=> 'hi',
116                 )
117         ];
118         my $email = Email::MIME->create(
119                 header_str => [
120                   From => 'a@example.com',
121                   Subject => 'blah',
122                   'Content-Type' => 'multipart/alternative'
123                 ],
124                 parts => $parts,
125         );
126         is(1, PublicInbox::Filter->run($email), "run was a success");
127         my $parsed = Email::MIME->new($email->as_string);
128         is("text/plain", $parsed->header("Content-Type"));
129         is(scalar $parsed->parts, 1, "HTML part removed");
130         my %bodies;
131         $parsed->walk_parts(sub {
132                 my ($part) = @_;
133                 return if $part->subparts; # walk_parts already recurses
134                 count_body_parts(\%bodies, $part);
135         });
136         is(scalar keys %bodies, 1, "one bodies");
137         is($bodies{"hi"}, 1, "plain text part unchanged");
138 }
139
140 # multi-part plain-text-only
141 {
142         my $parts = [
143                 Email::MIME->create(
144                         attributes => { content_type => 'text/plain', },
145                         body => 'hi',
146                 ),
147                 Email::MIME->create(
148                         attributes => { content_type => 'text/plain', },
149                         body => 'bye',
150                 )
151         ];
152         my $email = Email::MIME->create(
153                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
154                 parts => $parts,
155         );
156         is(1, PublicInbox::Filter->run($email), "run was a success");
157         my $parsed = Email::MIME->new($email->as_string);
158         is(scalar $parsed->parts, 2, "still 2 parts");
159         my %bodies;
160         $parsed->walk_parts(sub {
161                 my ($part) = @_;
162                 return if $part->subparts; # walk_parts already recurses
163                 count_body_parts(\%bodies, $part);
164         });
165         is(scalar keys %bodies, 2, "two bodies");
166         is($bodies{"bye"}, 1, "bye part exists");
167         is($bodies{"hi"}, 1, "hi part exists");
168         is($parsed->header("X-Content-Filtered-By"), undef,
169                 "XCFB header unset");
170 }
171
172 # multi-part HTML, several HTML parts
173 {
174         my $parts = [
175                 Email::MIME->create(
176                         attributes => {
177                                 content_type => 'text/html',
178                                 encoding => 'base64',
179                         },
180                         body => '<html><body>b64 body</body></html>',
181                 ),
182                 Email::MIME->create(
183                         attributes => {
184                                 content_type => 'text/html',
185                                 encoding => 'quoted-printable',
186                         },
187                         body => '<html><body>qp body</body></html>',
188                 )
189         ];
190         my $email = Email::MIME->create(
191                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
192                 parts => $parts,
193         );
194         is(1, PublicInbox::Filter->run($email), "run was a success");
195         my $parsed = Email::MIME->new($email->as_string);
196         is(scalar $parsed->parts, 2, "still 2 parts");
197         my %bodies;
198         $parsed->walk_parts(sub {
199                 my ($part) = @_;
200                 return if $part->subparts; # walk_parts already recurses
201                 count_body_parts(\%bodies, $part);
202         });
203         is(scalar keys %bodies, 2, "two body parts");
204         is($bodies{"b64 body"}, 1, "base64 part converted");
205         is($bodies{"qp body"}, 1, "qp part converted");
206         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
207              "XCFB header added");
208 }
209
210 # plain-text with image attachments, kill images
211 {
212         my $parts = [
213                 Email::MIME->create(
214                         attributes => { content_type => 'text/plain' },
215                         body => 'see image',
216                 ),
217                 Email::MIME->create(
218                         attributes => {
219                                 content_type => 'image/jpeg',
220                                 filename => 'scary.jpg',
221                                 encoding => 'base64',
222                         },
223                         body => 'bad',
224                 )
225         ];
226         my $email = Email::MIME->create(
227                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
228                 parts => $parts,
229         );
230         is(1, PublicInbox::Filter->run($email), "run was a success");
231         my $parsed = Email::MIME->new($email->as_string);
232         is(scalar $parsed->parts, 1, "image part removed");
233         my %bodies;
234         $parsed->walk_parts(sub {
235                 my ($part) = @_;
236                 return if $part->subparts; # walk_parts already recurses
237                 count_body_parts(\%bodies, $part);
238         });
239         is(scalar keys %bodies, 1, "one body");
240         is($bodies{'see image'}, 1, 'original body exists');
241         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
242              "XCFB header added");
243 }
244
245 # all bad
246 {
247         my $parts = [
248                 Email::MIME->create(
249                         attributes => {
250                                 content_type => 'image/jpeg',
251                                 filename => 'scary.jpg',
252                                 encoding => 'base64',
253                         },
254                         body => 'bad',
255                 ),
256                 Email::MIME->create(
257                         attributes => {
258                                 content_type => 'text/plain',
259                                 filename => 'scary.exe',
260                                 encoding => 'base64',
261                         },
262                         body => 'bad',
263                 ),
264         ];
265         my $email = Email::MIME->create(
266                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
267                 parts => $parts,
268         );
269         is(0, PublicInbox::Filter->run($email),
270                 "run signaled to stop delivery");
271         my $parsed = Email::MIME->new($email->as_string);
272         is(scalar $parsed->parts, 1, "bad parts removed");
273         my %bodies;
274         $parsed->walk_parts(sub {
275                 my ($part) = @_;
276                 return if $part->subparts; # walk_parts already recurses
277                 count_body_parts(\%bodies, $part);
278         });
279         is(scalar keys %bodies, 1, "one body");
280         is($bodies{"all attachments scrubbed by PublicInbox::Filter"}, 1,
281            "attachment scrubber left its mark");
282         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
283              "XCFB header added");
284 }
285
286 {
287         my $s = Email::MIME->create(
288                 header => [
289                         From => 'a@example.com',
290                         To => 'b@example.com',
291                         'Content-Type' => 'test/pain',
292                         Subject => 'this is a subject',
293                 ],
294                 body => "hello world\n",
295         );
296         is(0, PublicInbox::Filter->run($s), "run was a failure");
297         like($s->as_string, qr/scrubbed/, "scrubbed message");
298 }
299
300 # multi-part with application/octet-stream
301 {
302         my $os = 'application/octet-stream';
303         my $parts = [
304                 Email::MIME->create(
305                         attributes => { content_type => $os },
306                         body => <<EOF
307 #include <stdio.h>
308 int main(void)
309 {
310         printf("Hello world\\n");
311         return 0;
312 }
313 \f
314 /* some folks like ^L */
315 EOF
316                 ),
317                 Email::MIME->create(
318                         attributes => {
319                                 filename => 'zero.data',
320                                 encoding => 'base64',
321                                 content_type => $os,
322                         },
323                         body => ("\0" x 4096),
324                 )
325         ];
326         my $email = Email::MIME->create(
327                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
328                 parts => $parts,
329         );
330         is(1, PublicInbox::Filter->run($email), "run was a success");
331         my $parsed = Email::MIME->new($email->as_string);
332         is(scalar $parsed->parts, 1, "only one remaining part");
333         like($parsed->header("X-Content-Filtered-By"),
334                 qr/PublicInbox::Filter/, "XCFB header added");
335 }
336
337 done_testing();