]> Sergey Matveev's repositories - public-inbox.git/commitdiff
mda: share commit setup code with -learn
authorEric Wong <e@80x24.org>
Sat, 19 Apr 2014 23:23:10 +0000 (23:23 +0000)
committerEric Wong <e@80x24.org>
Sat, 19 Apr 2014 23:27:18 +0000 (23:27 +0000)
We need -learn to do many of the same things as -mda
when we have a false-positive.  We also need -learn to
do HTML filtering in case the training user screws up.

lib/PublicInbox/MDA.pm
public-inbox-learn
public-inbox-mda

index 22879236e0b76dddda14df0aa605e3cdfedbe80f..bb14ae50cf27a8d28dd9bf87d40686814a2f8650 100644 (file)
@@ -4,8 +4,10 @@ package PublicInbox::MDA;
 use strict;
 use warnings;
 use Email::Address;
+use Encode qw/decode encode/;
 use Date::Parse qw(strptime);
 use constant MAX_SIZE => 1024 * 500; # same as spamc default
+use constant cmd => qw/ssoma-mda -1/;
 
 # drop plus addressing for matching
 sub __drop_plus {
@@ -53,4 +55,32 @@ sub recipient_specified {
        return 0;
 }
 
+# RFC2919 and RFC2369
+sub set_list_headers {
+       my ($class, $simple, $dst) = @_;
+       my $pa = "<$dst->{-primary_address}>";
+       $simple->header_set("List-Id", $pa);
+       $simple->header_set("List-Post", $pa);
+
+       my $url = $dst->{url};
+       if (defined $url) {
+               $simple->header_set("List-Archive", "<$url>");
+               $simple->header_set("List-Help", "<${url}help>");
+       }
+}
+
+# returns a 3-element array: name, email, date
+sub author_info {
+       my ($class, $simple) = @_;
+
+       my $from = decode('MIME-Header', $simple->header('From'));
+       $from = encode('utf8', $from);
+       my @from = Email::Address->parse($from);
+       my $name = $from[0]->name;
+       defined $name or $name = '';
+       my $email = $from[0]->address;
+       defined $email or $email = '';
+       ($name, $email, $simple->header('Date'));
+}
+
 1;
index d770f0f7b348fe82b23f04e62d14a405b5575d59..2c2bbfb55ffd6037c3c91f086366330001f79811 100755 (executable)
@@ -54,19 +54,22 @@ foreach my $recipient (keys %dests) {
                        }
                }
        } else { # $train eq "ham"
-               my $from = $simple->header("From");
-               my @from = Email::Address->parse($from);
-               my $name = $from[0]->name;
-               defined $name or $name = "";
-               my $email = $from[0]->address;
-               defined $email or $email = "";
+               require PublicInbox::MDA;
+               require PublicInbox::Filter;
+
+               # no checking for errors here, we assume the message has
+               # been reviewed by a human at this point:
+               PublicInbox::Filter->run($simple);
+
+               my ($name, $email, $date) =
+                               PublicInbox::MDA->author_info($simple);
                local $ENV{GIT_AUTHOR_NAME} = $name;
                local $ENV{GIT_AUTHOR_EMAIL} = $email;
-               local $ENV{GIT_AUTHOR_DATE} = $simple->header("Date");
+               local $ENV{GIT_AUTHOR_DATE} = $date;
 
                # Ham messages are trained when they're marked into
-               # a SEEN state, so this is idempotent
-               run([qw(ssoma-mda -1), $git_dir], \$in, \$out, \$err);
+               # a SEEN state, so this is idempotent:
+               run([PublicInbox::MDA->cmd, $git_dir], \$in, \$out, \$err);
                if ($err !~ /CONFLICT/) {
                        $err = 1;
                }
index dd3dac80291c14a962c92673b2da7c92cec2498a..504fefd7ee1a042351ea2adb98edaad35f51dc2a 100755 (executable)
@@ -11,7 +11,6 @@ use Encode qw/decode encode/;
 use Encode::MIME::Header;
 use File::Path::Expand qw/expand_filename/;
 use IPC::Run qw(run);
-use constant MDA => 'ssoma-mda';
 use PublicInbox::MDA;
 use PublicInbox::Filter;
 use PublicInbox::Config;
@@ -44,23 +43,18 @@ if (PublicInbox::MDA->precheck($filter, $recipient) &&
                # run spamc again on the HTML-free message
                if (do_spamc($simple, \$filtered)) {
                        $simple = Email::Simple->new($filtered);
-                       set_list_headers($simple, $dst);
+                       PublicInbox::MDA->set_list_headers($simple, $dst);
                        $filter->simple($simple);
 
-                       my $from = decode('MIME-Header', $filter->from);
-                       $from = encode("utf8", $from);
-                       my @from = Email::Address->parse($from);
-                       my $name = $from[0]->name;
-                       defined $name or $name = "";
-                       my $email = $from[0]->address;
-                       defined $email or $email = "";
+                       my ($name, $email, $date) =
+                                       PublicInbox::MDA->author_info($simple);
                        local $ENV{GIT_AUTHOR_NAME} = $name;
                        local $ENV{GIT_AUTHOR_EMAIL} = $email;
-                       local $ENV{GIT_AUTHOR_DATE} = $simple->header("Date");
+                       local $ENV{GIT_AUTHOR_DATE} = $date;
                        local $ENV{GIT_COMMITTER_EMAIL} = $recipient;
                        local $ENV{GIT_COMMITTER_NAME} = $dst->{listname};
 
-                       $filter->pipe(MDA, '-1', $main_repo);
+                       $filter->pipe(PublicInbox::MDA->cmd, $main_repo);
                }
        }
 }
@@ -78,17 +72,3 @@ sub do_spamc {
 
        return ($@ || $? || !defined($$out) || length($$out) == 0) ? 0 : 1;
 }
-
-# RFC2919 and RFC2369
-sub set_list_headers {
-       my ($simple, $dst) = @_;
-       my $pa = "<$dst->{-primary_address}>";
-       $simple->header_set("List-Id", $pa);
-       $simple->header_set("List-Post", $pa);
-
-       my $url = $dst->{url};
-       if (defined $url) {
-               $simple->header_set("List-Archive", "<$url>");
-               $simple->header_set("List-Help", "<${url}help>");
-       }
-}