# returns undef on duplicate
sub add {
- my ($self, $mime) = @_; # mime = Email::MIME
+ my ($self, $mime, $check_cb) = @_; # mime = Email::MIME
my $from = $mime->header('From');
my ($email) = ($from =~ /([^<\s]+\@[^>\s]+)/g);
# kill potentially confusing/misleading headers
$mime->header_set($_) for qw(bytes lines content-length status);
+ if ($check_cb) {
+ $mime = $check_cb->($mime) or return;
+ }
+
$mime = $mime->as_string;
my $blob = $self->{mark}++;
print $w "blob\nmark :$blob\ndata ", length($mime), "\n" or wfail;
sub new {
my ($class, $config) = @_;
- my (%mdmap, @mdir);
+ my (%mdmap, @mdir, $spamc);
+
+ # XXX is "publicinboxlearn" really a good namespace for this?
my $k = 'publicinboxlearn.watchspam';
if (my $spamdir = $config->{$k}) {
if ($spamdir =~ s/\Amaildir://) {
warn "unsupported $k=$spamdir\n";
}
}
+
+ $k = 'publicinboxwatch.spamcheck';
+ my $spamcheck = $config->{$k};
+ if ($spamcheck) {
+ if ($spamcheck eq 'spamc') {
+ $spamcheck = 'PublicInbox::Spamcheck::Spamc';
+ }
+ if ($spamcheck =~ /::/) {
+ eval "require $spamcheck";
+ $spamcheck = _spamcheck_cb($spamcheck->new);
+ } else {
+ warn "unsupported $k=$spamcheck\n";
+ $spamcheck = undef;
+ }
+ }
foreach $k (keys %$config) {
$k =~ /\Apublicinbox\.([^\.]+)\.watch\z/ or next;
my $name = $1;
my $mdre = join('|', map { quotemeta($_) } @mdir);
$mdre = qr!\A($mdre)/!;
bless {
+ spamcheck => $spamcheck,
mdmap => \%mdmap,
mdir => \@mdir,
mdre => $mdre,
}
_force_mid($mime);
- $im->add($mime);
+ $im->add($mime, $self->{spamcheck});
}
sub watch {
undef;
}
+sub _spamcheck_cb {
+ my ($sc) = @_;
+ sub {
+ my ($mime) = @_;
+ my $tmp = '';
+ if ($sc->spamcheck($mime, \$tmp)) {
+ return Email::MIME->new(\$tmp);
+ }
+ undef;
+ }
+}
+
1;
$mime->header_set('Message-ID', '<b@example.com>');
$mime->header_set('Subject', 'msg2');
-like($im->add($mime), qr/\A:\d+\z/, 'added 2nd message');
+like($im->add($mime, sub { $mime }), qr/\A:\d+\z/, 'added 2nd message');
$im->done;
@revs = $git->qx(qw(rev-list HEAD));
is(scalar @revs, 2, '2 revisions exist');
is($msg->header('Message-ID'), '<a@example.com>', 'Message-ID matches');
isnt($msg->header('Subject'), $mime->header('Subject'), 'subject mismatch');
+$mime->header_set('Message-Id', '<failcheck@example.com>');
+is($im->add($mime, sub { undef }), undef, 'check callback fails');
+is($im->remove($mime), undef, 'message not added, so not removed');
+
$im->done;
done_testing();
use Test::More;
use File::Temp qw/tempdir/;
use Email::MIME;
+use Cwd;
use PublicInbox::Config;
my @mods = qw(Filesys::Notify::Simple);
foreach my $mod (@mods) {
is(scalar @list, 4, 'four revisions in rev-list');
}
+{
+ my $fail_bin = getcwd()."/t/fail-bin";
+ ok(-x "$fail_bin/spamc", "mock spamc exists");
+ my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc ham mock
+ local $ENV{PATH} = $fail_path;
+ PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+ $config->{'publicinboxwatch.spamcheck'} = 'spamc';
+ PublicInbox::WatchMaildir->new($config)->scan;
+ @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
+ is(scalar @list, 0, 'tree has no files spamc checked');
+ is(unlink(glob("$maildir/new/*")), 1);
+}
+
+{
+ my $main_bin = getcwd()."/t/main-bin";
+ ok(-x "$main_bin/spamc", "mock spamc exists");
+ my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock
+ local $ENV{PATH} = $main_path;
+ PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+ $config->{'publicinboxwatch.spamcheck'} = 'spamc';
+ @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
+ PublicInbox::WatchMaildir->new($config)->scan;
+ @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
+ is(scalar @list, 1, 'tree has one file after spamc checked');
+
+ # XXX: workaround some weird caching/memoization in cat-file,
+ # shouldn't be an issue in real-world use, though...
+ $git = PublicInbox::Git->new($git_dir);
+
+ my $mref = $git->cat_file('refs/heads/master:'.$list[0]);
+ like($$mref, qr/something\n\z/s, 'message scrubbed on import');
+}
+
done_testing;