From: Eric Wong Date: Sat, 19 Apr 2014 23:23:10 +0000 (+0000) Subject: mda: share commit setup code with -learn X-Git-Tag: v1.0.0~1230 X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=a8d9e2f1853032016db6ff177979873b3bdadd85;p=public-inbox.git mda: share commit setup code with -learn We need -learn to do many of the same things as -mda when we have a false-positive. We also need -learn to do HTML filtering in case the training user screws up. --- diff --git a/lib/PublicInbox/MDA.pm b/lib/PublicInbox/MDA.pm index 22879236..bb14ae50 100644 --- a/lib/PublicInbox/MDA.pm +++ b/lib/PublicInbox/MDA.pm @@ -4,8 +4,10 @@ package PublicInbox::MDA; use strict; use warnings; use Email::Address; +use Encode qw/decode encode/; use Date::Parse qw(strptime); use constant MAX_SIZE => 1024 * 500; # same as spamc default +use constant cmd => qw/ssoma-mda -1/; # drop plus addressing for matching sub __drop_plus { @@ -53,4 +55,32 @@ sub recipient_specified { return 0; } +# RFC2919 and RFC2369 +sub set_list_headers { + my ($class, $simple, $dst) = @_; + my $pa = "<$dst->{-primary_address}>"; + $simple->header_set("List-Id", $pa); + $simple->header_set("List-Post", $pa); + + my $url = $dst->{url}; + if (defined $url) { + $simple->header_set("List-Archive", "<$url>"); + $simple->header_set("List-Help", "<${url}help>"); + } +} + +# returns a 3-element array: name, email, date +sub author_info { + my ($class, $simple) = @_; + + my $from = decode('MIME-Header', $simple->header('From')); + $from = encode('utf8', $from); + my @from = Email::Address->parse($from); + my $name = $from[0]->name; + defined $name or $name = ''; + my $email = $from[0]->address; + defined $email or $email = ''; + ($name, $email, $simple->header('Date')); +} + 1; diff --git a/public-inbox-learn b/public-inbox-learn index d770f0f7..2c2bbfb5 100755 --- a/public-inbox-learn +++ b/public-inbox-learn @@ -54,19 +54,22 @@ foreach my $recipient (keys %dests) { } } } else { # $train eq "ham" - my $from = $simple->header("From"); - my @from = Email::Address->parse($from); - my $name = $from[0]->name; - defined $name or $name = ""; - my $email = $from[0]->address; - defined $email or $email = ""; + require PublicInbox::MDA; + require PublicInbox::Filter; + + # no checking for errors here, we assume the message has + # been reviewed by a human at this point: + PublicInbox::Filter->run($simple); + + my ($name, $email, $date) = + PublicInbox::MDA->author_info($simple); local $ENV{GIT_AUTHOR_NAME} = $name; local $ENV{GIT_AUTHOR_EMAIL} = $email; - local $ENV{GIT_AUTHOR_DATE} = $simple->header("Date"); + local $ENV{GIT_AUTHOR_DATE} = $date; # Ham messages are trained when they're marked into - # a SEEN state, so this is idempotent - run([qw(ssoma-mda -1), $git_dir], \$in, \$out, \$err); + # a SEEN state, so this is idempotent: + run([PublicInbox::MDA->cmd, $git_dir], \$in, \$out, \$err); if ($err !~ /CONFLICT/) { $err = 1; } diff --git a/public-inbox-mda b/public-inbox-mda index dd3dac80..504fefd7 100755 --- a/public-inbox-mda +++ b/public-inbox-mda @@ -11,7 +11,6 @@ use Encode qw/decode encode/; use Encode::MIME::Header; use File::Path::Expand qw/expand_filename/; use IPC::Run qw(run); -use constant MDA => 'ssoma-mda'; use PublicInbox::MDA; use PublicInbox::Filter; use PublicInbox::Config; @@ -44,23 +43,18 @@ if (PublicInbox::MDA->precheck($filter, $recipient) && # run spamc again on the HTML-free message if (do_spamc($simple, \$filtered)) { $simple = Email::Simple->new($filtered); - set_list_headers($simple, $dst); + PublicInbox::MDA->set_list_headers($simple, $dst); $filter->simple($simple); - my $from = decode('MIME-Header', $filter->from); - $from = encode("utf8", $from); - my @from = Email::Address->parse($from); - my $name = $from[0]->name; - defined $name or $name = ""; - my $email = $from[0]->address; - defined $email or $email = ""; + my ($name, $email, $date) = + PublicInbox::MDA->author_info($simple); local $ENV{GIT_AUTHOR_NAME} = $name; local $ENV{GIT_AUTHOR_EMAIL} = $email; - local $ENV{GIT_AUTHOR_DATE} = $simple->header("Date"); + local $ENV{GIT_AUTHOR_DATE} = $date; local $ENV{GIT_COMMITTER_EMAIL} = $recipient; local $ENV{GIT_COMMITTER_NAME} = $dst->{listname}; - $filter->pipe(MDA, '-1', $main_repo); + $filter->pipe(PublicInbox::MDA->cmd, $main_repo); } } } @@ -78,17 +72,3 @@ sub do_spamc { return ($@ || $? || !defined($$out) || length($$out) == 0) ? 0 : 1; } - -# RFC2919 and RFC2369 -sub set_list_headers { - my ($simple, $dst) = @_; - my $pa = "<$dst->{-primary_address}>"; - $simple->header_set("List-Id", $pa); - $simple->header_set("List-Post", $pa); - - my $url = $dst->{url}; - if (defined $url) { - $simple->header_set("List-Archive", "<$url>"); - $simple->header_set("List-Help", "<${url}help>"); - } -}