]> Sergey Matveev's repositories - public-inbox.git/commitdiff
-learn: nuke HTML portions when training as ham
authorEric Wong <e@80x24.org>
Thu, 13 Nov 2014 21:51:42 +0000 (21:51 +0000)
committerEric Wong <e@80x24.org>
Thu, 13 Nov 2014 21:51:42 +0000 (21:51 +0000)
Sometimes people send HTML email and I forget to fixup in my
MUA during moderation.  Automatically strip out HTML portions
instead.

public-inbox-learn
t/mda.t

index 13b75b763cfc9d3280011b29a237065d013de987..db0a1bb35e5ec8115e007c4dcd8cd12fdee82a70 100755 (executable)
@@ -24,9 +24,16 @@ foreach my $h (qw(Cc To)) {
        }
 }
 
-my $in = $mime->as_string;
-$mime->body_set('');
+my ($name, $email, $date);
+
+if ($train eq "ham") {
+       require PublicInbox::MDA;
+       require PublicInbox::Filter;
+       PublicInbox::Filter->run($mime);
+       ($name, $email, $date) = PublicInbox::MDA->author_info($mime);
+}
 
+my $in = $mime->as_string;
 my $err = 0;
 my @output = qw(> /dev/null > /dev/null);
 
@@ -50,16 +57,10 @@ foreach my $recipient (keys %dests) {
                        }
                }
        } else { # $train eq "ham"
-               require PublicInbox::MDA;
-               require PublicInbox::Filter;
-
-               # no checking for errors here, we assume the message has
+               # no checking for spam here, we assume the message has
                # been reviewed by a human at this point:
-               PublicInbox::Filter->run($mime);
                PublicInbox::MDA->set_list_headers($mime, $dst);
 
-               my ($name, $email, $date) =
-                               PublicInbox::MDA->author_info($mime);
                local $ENV{GIT_AUTHOR_NAME} = $name;
                local $ENV{GIT_AUTHOR_EMAIL} = $email;
                local $ENV{GIT_AUTHOR_DATE} = $date;
diff --git a/t/mda.t b/t/mda.t
index fad96e5bf44eb733600f5369c4dfed7130559a3b..53712a565e909a4c9bdf304e59c09a9f6d106eb6 100644 (file)
--- a/t/mda.t
+++ b/t/mda.t
@@ -205,14 +205,55 @@ EOF
        my $in = $simple->as_string;
 
        # now train it
+       # these should be overridden
        local $ENV{GIT_AUTHOR_EMAIL} = 'trainer@example.com';
        local $ENV{GIT_COMMITTER_EMAIL} = 'trainer@example.com';
+
        run([$learn, "ham"], \$in);
        is($?, 0, "learned ham without failure");
        my $msg = `ssoma cat $mid $maindir`;
        like($msg, qr/\Q$mid\E/, "ham message delivered");
        run([$learn, "ham"], \$in);
        is($?, 0, "learned ham idempotently ");
+
+       # ensure trained email is filtered, too
+       my $html_body = "<html><body>hi</body></html>";
+       my $parts = [
+               Email::MIME->create(
+                       attributes => {
+                               content_type => 'text/html; charset=UTF-8',
+                               encoding => 'base64',
+                       },
+                       body => $html_body,
+               ),
+               Email::MIME->create(
+                       attributes => {
+                               content_type => 'text/plain',
+                               encoding => 'quoted-printable',
+                       },
+                       body => 'hi = "bye"',
+               )
+       ];
+       $mid = 'multipart-html-sucks@11';
+       my $mime = Email::MIME->create(
+               header_str => [
+                 From => 'a@example.com',
+                 Subject => 'blah',
+                 Cc => $addr,
+                 'Message-ID' => "<$mid>",
+                 'Content-Type' => 'multipart/alternative',
+               ],
+               parts => $parts,
+       );
+
+       {
+               $in = $mime->as_string;
+               run([$learn, "ham"], \$in);
+               is($?, 0, "learned ham without failure");
+               $msg = `ssoma cat $mid $maindir`;
+               like($msg, qr/<\Q$mid\E>/, "ham message delivered");
+               unlike($msg, qr/<html>/i, '<html> filtered');
+       }
 }
 
 # faildir - emergency destination is maildir