X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWatchMaildir.pm;h=4ae400f7905fdb203fd3fa69507634e243b9c5c5;hb=4f623a133e5531032e378a3d5dd9aec9243450ae;hp=7547f6e4761f20008486d2642fd458a186917885;hpb=d75cbb9b69a780b6bbc37e243ada35dfd0c47552;p=public-inbox.git diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index 7547f6e4..4ae400f7 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -7,7 +7,7 @@ package PublicInbox::WatchMaildir; use strict; use warnings; use PublicInbox::Eml; -use PublicInbox::InboxWritable; +use PublicInbox::InboxWritable qw(eml_from_path warn_ignore_cb); use PublicInbox::Filter::Base qw(REJECT); use PublicInbox::Spamcheck; use PublicInbox::Sigfd; @@ -15,7 +15,6 @@ use PublicInbox::DS qw(now); use PublicInbox::MID qw(mids); use PublicInbox::ContentHash qw(content_hash); use POSIX qw(_exit); -*mime_from_path = \&PublicInbox::InboxWritable::mime_from_path; sub compile_watchheaders ($) { my ($ibx) = @_; @@ -124,8 +123,10 @@ sub new { sub _done_for_now { my ($self) = @_; local $PublicInbox::DS::in_loop = 0; # waitpid() synchronously - for (values %{$self->{importers}}) { - $_->done if $_; # $_ may be undef during cleanup + for my $im (values %{$self->{importers}}) { + next if !$im; # $im may be undef during cleanup + eval { $im->done }; + warn "$im->{ibx}->{name} ->done: $@\n" if $@; } } @@ -137,43 +138,51 @@ sub remove_eml_i { # each_inbox callback $im->remove($eml, 'spam'); if (my $scrub = $ibx->filter($im)) { my $scrubbed = $scrub->scrub($eml, 1); - $scrubbed or return; - $scrubbed == REJECT() and return; - $im->remove($scrubbed, 'spam'); + if ($scrubbed && $scrubbed != REJECT) { + $im->remove($scrubbed, 'spam'); + } } }; - warn "error removing spam at: $loc from $ibx->{name}: $@\n" if $@; + if ($@) { + warn "error removing spam at: $loc from $ibx->{name}: $@\n"; + _done_for_now($self); + } } sub _remove_spam { my ($self, $path) = @_; # path must be marked as (S)een $path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return; - my $eml = mime_from_path($path) or return; + my $eml = eml_from_path($path) or return; + local $SIG{__WARN__} = warn_ignore_cb(); $self->{config}->each_inbox(\&remove_eml_i, [ $self, $eml, $path ]); } sub import_eml ($$$) { my ($self, $ibx, $eml) = @_; - my $im = _importer_for($self, $ibx); # any header match means it's eligible for the inbox: if (my $watch_hdrs = $ibx->{-watchheaders}) { my $ok; - my $hdr = $eml->header_obj; for my $wh (@$watch_hdrs) { - my @v = $hdr->header_raw($wh->[0]); + my @v = $eml->header_raw($wh->[0]); $ok = grep(/$wh->[1]/, @v) and last; } return unless $ok; } - - if (my $scrub = $ibx->filter($im)) { - my $ret = $scrub->scrub($eml) or return; - $ret == REJECT() and return; - $eml = $ret; + eval { + my $im = _importer_for($self, $ibx); + if (my $scrub = $ibx->filter($im)) { + my $scrubbed = $scrub->scrub($eml) or return; + $scrubbed == REJECT and return; + $eml = $scrubbed; + } + $im->add($eml, $self->{spamcheck}); + }; + if ($@) { + warn "$ibx->{name} add failed: $@\n"; + _done_for_now($self); } - $im->add($eml, $self->{spamcheck}); } sub _try_path { @@ -190,14 +199,14 @@ sub _try_path { } my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; local $SIG{__WARN__} = sub { - $warn_cb->("path: $path\n"); - $warn_cb->(@_); + my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; + $warn_cb->($pfx, "path: $path\n", @_); }; if (!ref($inboxes) && $inboxes eq 'watchspam') { return _remove_spam($self, $path); } foreach my $ibx (@$inboxes) { - my $eml = mime_from_path($path) or next; + my $eml = eml_from_path($path) or next; import_eml($self, $ibx, $eml); } } @@ -279,7 +288,7 @@ sub cfg_intvl ($$$) { sub cfg_bool ($$$) { my ($cfg, $key, $url) = @_; my $orig = $cfg->urlmatch($key, $url) // return; - my $bool = PublicInbox::Config::_git_config_bool($orig); + my $bool = $cfg->git_bool($orig); warn "W: $key=$orig for $url is not boolean\n" unless defined($bool); $bool; } @@ -373,8 +382,8 @@ sub mic_for ($$$) { # mic = Mail::IMAPClient $mic; } -sub imap_import_msg ($$$$) { - my ($self, $url, $uid, $raw) = @_; +sub imap_import_msg ($$$$$) { + my ($self, $url, $uid, $raw, $flags) = @_; # our target audience expects LF-only, save storage $$raw =~ s/\r\n/\n/sg; @@ -385,9 +394,13 @@ sub imap_import_msg ($$$$) { my $x = import_eml($self, $ibx, $eml); } } elsif ($inboxes eq 'watchspam') { - my $eml = PublicInbox::Eml->new($raw); - my $arg = [ $self, $eml, "$url UID:$uid" ]; - $self->{config}->each_inbox(\&remove_eml_i, $arg); + # we don't remove unseen messages + if ($flags =~ /\\Seen\b/) { + local $SIG{__WARN__} = warn_ignore_cb(); + my $eml = PublicInbox::Eml->new($raw); + my $arg = [ $self, $eml, "$url UID:$uid" ]; + $self->{config}->each_inbox(\&remove_eml_i, $arg); + } } else { die "BUG: destination unknown $inboxes"; } @@ -436,9 +449,9 @@ sub imap_fetch_all ($$$) { my ($uids, $batch); my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; local $SIG{__WARN__} = sub { + my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; $batch //= '?'; - $warn_cb->("$url UID:$batch\n"); - $warn_cb->(@_); + $warn_cb->("$pfx$url UID:$batch\n", @_); }; my $err; do { @@ -464,7 +477,7 @@ sub imap_fetch_all ($$$) { my @batch = splice(@$uids, 0, $bs); $batch = join(',', @batch); local $0 = "UID:$batch $mbx $sec"; - my $r = $mic->fetch_hash($batch, $req); + my $r = $mic->fetch_hash($batch, $req, 'FLAGS'); unless ($r) { # network error? $err = "E: $url UID FETCH $batch error: $!"; last; @@ -473,7 +486,8 @@ sub imap_fetch_all ($$$) { # messages get deleted, so holes appear my $per_uid = delete $r->{$uid} // next; my $raw = delete($per_uid->{$key}) // next; - imap_import_msg($self, $url, $uid, \$raw); + my $fl = $per_uid->{FLAGS} // ''; + imap_import_msg($self, $url, $uid, \$raw, $fl); $last_uid = $uid; last if $self->{quit}; } @@ -869,7 +883,8 @@ sub nntp_fetch_all ($$$) { my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; my ($err, $art); local $SIG{__WARN__} = sub { - $warn_cb->("$url ", $art ? ("ARTICLE $art") : (), "\n", @_); + my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; + $warn_cb->("$pfx$url ", $art ? ("ARTICLE $art") : (), "\n", @_); }; my $inboxes = $self->{nntp}->{$url}; my $last_art;