use strict;
use warnings;
use PublicInbox::Config;
+use PublicInbox::Git;
+use PublicInbox::Import;
use Email::MIME;
-use Email::Address;
-use IPC::Run qw/run/;
+use Email::MIME::ContentType;
+$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
+use PublicInbox::Address;
+use PublicInbox::Spawn qw(spawn);
my $train = shift or die "usage: $usage\n";
if ($train !~ /\A(?:ham|spam)\z/) {
die "`$train' not recognized.\nusage: $usage\n";
}
my $pi_config = PublicInbox::Config->new;
+my $err;
my $mime = Email::MIME->new(eval {
local $/;
my $data = scalar <STDIN>;
$data =~ s/\AFrom [^\r\n]*\r?\n//s;
+ eval {
+ my @cmd = (qw(spamc -L), $train);
+ my ($r, $w);
+ pipe($r, $w) or die "pipe failed: $!";
+ open my $null, '>', '/dev/null' or
+ die "failed to open /dev/null: $!";
+ my $nullfd = fileno($null);
+ my %rdr = (0 => fileno($r), 1 => $nullfd, 2 => $nullfd);
+ my $pid = spawn(\@cmd, undef, \%rdr);
+ close $null;
+ close $r or die "close \$r failed: $!";
+ print $w $data or die "print \$w failed: $!";
+ close $w or die "close \$w failed: $!";
+ waitpid($pid, 0);
+ die "spamc failed with: $?\n" if $?;
+ };
+ $err = $@;
$data
});
# get all recipients
my %dests;
foreach my $h (qw(Cc To)) {
- foreach my $recipient (Email::Address->parse($mime->header($h))) {
- $dests{lc($recipient->address)} = 1;
+ my $val = $mime->header($h) or next;
+ foreach my $email (PublicInbox::Address::emails($val)) {
+ $dests{lc($email)} = 1;
}
}
-my ($name, $email, $date);
-
-if ($train eq "ham") {
- require PublicInbox::MDA;
- require PublicInbox::Filter;
- PublicInbox::Filter->run($mime);
- ($name, $email, $date) = PublicInbox::MDA->author_info($mime);
-}
-
-my $in = $mime->as_string;
-my $err = 0;
-my @output = qw(> /dev/null > /dev/null);
+require PublicInbox::MDA if $train eq "ham";
# n.b. message may be cross-posted to multiple public-inboxes
foreach my $recipient (keys %dests) {
my $dst = $pi_config->lookup($recipient) or next;
my $git_dir = $dst->{mainrepo} or next;
- my ($out, $err) = ("", "");
-
+ my $git = PublicInbox::Git->new($git_dir);
# We do not touch GIT_COMMITTER_* env here so we can track
# who trained the message.
- # We will not touch GIT_AUTHOR_* when learning spam messages, either
+ my $name = $ENV{GIT_COMMITTER_NAME} || $dst->{inbox};
+ my $email = $ENV{GIT_COMMITTER_EMAIL} || $recipient;
+ my $im = PublicInbox::Import->new($git, $name, $email);
+
if ($train eq "spam") {
# This needs to be idempotent, as my inotify trainer
# may train for each cross-posted message, and this
# script already learns for every list in
# ~/.public-inbox/config
- if (!run(["ssoma-rm", $git_dir], \$in, \$out, \$err)) {
- if ($err !~ /^git cat-file .+ failed: 32768$/) {
- $err = 1;
- }
- }
+ $im->remove($mime);
} else { # $train eq "ham"
# no checking for spam here, we assume the message has
# been reviewed by a human at this point:
PublicInbox::MDA->set_list_headers($mime, $dst);
- my $s = $mime->as_string;
-
- local $ENV{GIT_AUTHOR_NAME} = $name;
- local $ENV{GIT_AUTHOR_EMAIL} = $email;
- local $ENV{GIT_AUTHOR_DATE} = $date;
# Ham messages are trained when they're marked into
# a SEEN state, so this is idempotent:
- run([PublicInbox::MDA->cmd, $git_dir], \$s, \$out, \$err);
- if ($err !~ /CONFLICT/) {
- $err = 1;
- }
- }
- if (!run([qw(spamc -L), $train], \$in, @output)) {
- $err = 1;
+ $im->add($mime);
}
-
- $err or eval {
+ $im->done;
+ eval {
require PublicInbox::SearchIdx;
PublicInbox::SearchIdx->new($git_dir, 2)->index_sync;
};
}
-exit $err;
+if ($err) {
+ warn $err;
+ exit 1;
+}