]> Sergey Matveev's repositories - public-inbox.git/blob - t/filter.t
initial commit
[public-inbox.git] / t / filter.t
1 # Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
2 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
3 use strict;
4 use warnings;
5 use Test::More;
6 use Email::MIME;
7 use Email::Filter;
8 use PublicInbox::Filter;
9
10 sub count_body_parts {
11         my ($bodies, $part) = @_;
12         my $body = $part->body_raw;
13         $body =~ s/\A\s*//;
14         $body =~ s/\s*\z//;
15         $bodies->{$body} ||= 0;
16         $bodies->{$body}++;
17 }
18
19 # plain-text email is passed through unchanged
20 {
21         my $s = Email::Simple->create(
22                 header => [
23                         From => 'a@example.com',
24                         To => 'b@example.com',
25                         'Content-Type' => 'text/plain',
26                         Subject => 'this is a subject',
27                 ],
28                 body => "hello world\n",
29         );
30         my $f = Email::Filter->new(data => $s->as_string);
31         is(1, PublicInbox::Filter->run($f->simple), "run was a success");
32         is($s->as_string, $f->simple->as_string, "plain email unchanged");
33 }
34
35 # convert single-part HTML to plain-text
36 {
37         my $s = Email::Simple->create(
38                 header => [
39                         From => 'a@example.com',
40                         To => 'b@example.com',
41                         'Content-Type' => 'text/html',
42                         Subject => 'HTML only badness',
43                 ],
44                 body => "<html><body>bad body</body></html>\n",
45         );
46         my $f = Email::Filter->new(data => $s->as_string);
47         is(1, PublicInbox::Filter->run($f->simple), "run was a success");
48         unlike($f->simple->as_string, qr/<html>/, "HTML removed");
49         is("text/plain", $f->simple->header("Content-Type"),
50                 "content-type changed");
51         like($f->simple->body, qr/\A\s*bad body\s*\z/, "body");
52         like($f->simple->header("X-Content-Filtered-By"),
53                 qr/PublicInbox::Filter/, "XCFB header added");
54 }
55
56 # multipart/alternative: HTML and plain-text, keep the plain-text
57 {
58         my $html_body = "<html><body>hi</body></html>";
59         my $parts = [
60                 Email::MIME->create(
61                         attributes => {
62                                 content_type => 'text/html; charset=UTF-8',
63                                 encoding => 'base64',
64                         },
65                         body => $html_body,
66                 ),
67                 Email::MIME->create(
68                         attributes => {
69                                 content_type => 'text/plain',
70                         },
71                         body=> 'hi',
72                 )
73         ];
74         my $email = Email::MIME->create(
75                 header_str => [
76                   From => 'a@example.com',
77                   Subject => 'blah',
78                   'Content-Type' => 'multipart/alternative'
79                 ],
80                 parts => $parts,
81         );
82         my $f = Email::Filter->new(data => $email->as_string);
83         is(1, PublicInbox::Filter->run($f->simple), "run was a success");
84         my $parsed = Email::MIME->new($f->simple->as_string);
85         is("text/plain", $parsed->header("Content-Type"));
86         is(scalar $parsed->parts, 1, "HTML part removed");
87         my %bodies;
88         $parsed->walk_parts(sub {
89                 my ($part) = @_;
90                 return if $part->subparts; # walk_parts already recurses
91                 count_body_parts(\%bodies, $part);
92         });
93         is(scalar keys %bodies, 1, "one bodies");
94         is($bodies{"hi"}, 1, "plain text part unchanged");
95 }
96
97 # multi-part plain-text-only
98 {
99         my $parts = [
100                 Email::MIME->create(
101                         attributes => { content_type => 'text/plain', },
102                         body => 'hi',
103                 ),
104                 Email::MIME->create(
105                         attributes => { content_type => 'text/plain', },
106                         body => 'bye',
107                 )
108         ];
109         my $email = Email::MIME->create(
110                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
111                 parts => $parts,
112         );
113         my $f = Email::Filter->new(data => $email->as_string);
114         is(1, PublicInbox::Filter->run($f->simple), "run was a success");
115         my $parsed = Email::MIME->new($f->simple->as_string);
116         is(scalar $parsed->parts, 2, "still 2 parts");
117         my %bodies;
118         $parsed->walk_parts(sub {
119                 my ($part) = @_;
120                 return if $part->subparts; # walk_parts already recurses
121                 count_body_parts(\%bodies, $part);
122         });
123         is(scalar keys %bodies, 2, "two bodies");
124         is($bodies{"bye"}, 1, "bye part exists");
125         is($bodies{"hi"}, 1, "hi part exists");
126         is($parsed->header("X-Content-Filtered-By"), undef,
127                 "XCFB header unset");
128 }
129
130 # multi-part HTML, several HTML parts
131 {
132         my $parts = [
133                 Email::MIME->create(
134                         attributes => {
135                                 content_type => 'text/html',
136                                 encoding => 'base64',
137                         },
138                         body => '<html><body>b64 body</body></html>',
139                 ),
140                 Email::MIME->create(
141                         attributes => {
142                                 content_type => 'text/html',
143                                 encoding => 'quoted-printable',
144                         },
145                         body => '<html><body>qp body</body></html>',
146                 )
147         ];
148         my $email = Email::MIME->create(
149                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
150                 parts => $parts,
151         );
152         my $f = Email::Filter->new(data => $email->as_string);
153         is(1, PublicInbox::Filter->run($f->simple), "run was a success");
154         my $parsed = Email::MIME->new($f->simple->as_string);
155         is(scalar $parsed->parts, 2, "still 2 parts");
156         my %bodies;
157         $parsed->walk_parts(sub {
158                 my ($part) = @_;
159                 return if $part->subparts; # walk_parts already recurses
160                 count_body_parts(\%bodies, $part);
161         });
162         is(scalar keys %bodies, 2, "two body parts");
163         is($bodies{"b64 body"}, 1, "base64 part converted");
164         is($bodies{"qp body"}, 1, "qp part converted");
165         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
166              "XCFB header added");
167 }
168
169 # plain-text with image attachments, kill images
170 {
171         my $parts = [
172                 Email::MIME->create(
173                         attributes => { content_type => 'text/plain' },
174                         body => 'see image',
175                 ),
176                 Email::MIME->create(
177                         attributes => {
178                                 content_type => 'image/jpeg',
179                                 filename => 'scary.jpg',
180                                 encoding => 'base64',
181                         },
182                         body => 'bad',
183                 )
184         ];
185         my $email = Email::MIME->create(
186                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
187                 parts => $parts,
188         );
189         my $f = Email::Filter->new(data => $email->as_string);
190         is(1, PublicInbox::Filter->run($f->simple), "run was a success");
191         my $parsed = Email::MIME->new($f->simple->as_string);
192         is(scalar $parsed->parts, 1, "image part removed");
193         my %bodies;
194         $parsed->walk_parts(sub {
195                 my ($part) = @_;
196                 return if $part->subparts; # walk_parts already recurses
197                 count_body_parts(\%bodies, $part);
198         });
199         is(scalar keys %bodies, 1, "one body");
200         is($bodies{'see image'}, 1, 'original body exists');
201         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
202              "XCFB header added");
203 }
204
205 # all bad
206 {
207         my $parts = [
208                 Email::MIME->create(
209                         attributes => {
210                                 content_type => 'image/jpeg',
211                                 filename => 'scary.jpg',
212                                 encoding => 'base64',
213                         },
214                         body => 'bad',
215                 ),
216                 Email::MIME->create(
217                         attributes => {
218                                 content_type => 'text/plain',
219                                 filename => 'scary.exe',
220                                 encoding => 'base64',
221                         },
222                         body => 'bad',
223                 ),
224         ];
225         my $email = Email::MIME->create(
226                 header_str => [ From => 'a@example.com', Subject => 'blah' ],
227                 parts => $parts,
228         );
229         my $f = Email::Filter->new(data => $email->as_string);
230         is(0, PublicInbox::Filter->run($f->simple),
231                 "run signaled to stop delivery");
232         my $parsed = Email::MIME->new($f->simple->as_string);
233         is(scalar $parsed->parts, 1, "bad parts removed");
234         my %bodies;
235         $parsed->walk_parts(sub {
236                 my ($part) = @_;
237                 return if $part->subparts; # walk_parts already recurses
238                 count_body_parts(\%bodies, $part);
239         });
240         is(scalar keys %bodies, 1, "one body");
241         is($bodies{"all attachments scrubbed by PublicInbox::Filter"}, 1,
242            "attachment scrubber left its mark");
243         like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
244              "XCFB header added");
245 }
246
247 {
248         my $s = Email::Simple->create(
249                 header => [
250                         From => 'a@example.com',
251                         To => 'b@example.com',
252                         'Content-Type' => 'test/pain',
253                         Subject => 'this is a subject',
254                 ],
255                 body => "hello world\n",
256         );
257         my $f = Email::Filter->new(data => $s->as_string);
258         is(0, PublicInbox::Filter->run($f->simple), "run was a failure");
259         like($f->simple->as_string, qr/scrubbed/, "scrubbed message");
260 }
261
262 done_testing();