X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FWatchMaildir.pm;h=5176ef69db9e9d698ceb4a33efadc019739b80f9;hb=bb8aaee10d771f7e1461efd8ec86269215787e52;hp=7d4139a5dcf983461193854495359c45556a97af;hpb=77eafbd653d2efac546f2c330d8cf5e84bef2712;p=public-inbox.git diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index 7d4139a5..5176ef69 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -7,7 +7,7 @@ package PublicInbox::WatchMaildir; use strict; use warnings; use PublicInbox::Eml; -use PublicInbox::InboxWritable qw(eml_from_path); +use PublicInbox::InboxWritable qw(eml_from_path warn_ignore_cb); use PublicInbox::Filter::Base qw(REJECT); use PublicInbox::Spamcheck; use PublicInbox::Sigfd; @@ -108,6 +108,7 @@ sub new { return unless $mdre || scalar(keys %imap) || scalar(keys %nntp); bless { + max_batch => 10, # avoid hogging locks for too long spamcheck => $spamcheck, mdmap => \%mdmap, mdre => $mdre, @@ -154,6 +155,7 @@ sub _remove_spam { # path must be marked as (S)een $path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return; my $eml = eml_from_path($path) or return; + local $SIG{__WARN__} = warn_ignore_cb(); $self->{config}->each_inbox(\&remove_eml_i, [ $self, $eml, $path ]); } @@ -198,8 +200,8 @@ sub _try_path { } my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; local $SIG{__WARN__} = sub { - $warn_cb->("path: $path\n"); - $warn_cb->(@_); + my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; + $warn_cb->($pfx, "path: $path\n", @_); }; if (!ref($inboxes) && $inboxes eq 'watchspam') { return _remove_spam($self, $path); @@ -287,7 +289,7 @@ sub cfg_intvl ($$$) { sub cfg_bool ($$$) { my ($cfg, $key, $url) = @_; my $orig = $cfg->urlmatch($key, $url) // return; - my $bool = PublicInbox::Config::_git_config_bool($orig); + my $bool = $cfg->git_bool($orig); warn "W: $key=$orig for $url is not boolean\n" unless defined($bool); $bool; } @@ -381,8 +383,8 @@ sub mic_for ($$$) { # mic = Mail::IMAPClient $mic; } -sub imap_import_msg ($$$$) { - my ($self, $url, $uid, $raw) = @_; +sub imap_import_msg ($$$$$) { + my ($self, $url, $uid, $raw, $flags) = @_; # our target audience expects LF-only, save storage $$raw =~ s/\r\n/\n/sg; @@ -393,9 +395,13 @@ sub imap_import_msg ($$$$) { my $x = import_eml($self, $ibx, $eml); } } elsif ($inboxes eq 'watchspam') { - my $eml = PublicInbox::Eml->new($raw); - my $arg = [ $self, $eml, "$url UID:$uid" ]; - $self->{config}->each_inbox(\&remove_eml_i, $arg); + # we don't remove unseen messages + if ($flags =~ /\\Seen\b/) { + local $SIG{__WARN__} = warn_ignore_cb(); + my $eml = PublicInbox::Eml->new($raw); + my $arg = [ $self, $eml, "$url UID:$uid" ]; + $self->{config}->each_inbox(\&remove_eml_i, $arg); + } } else { die "BUG: destination unknown $inboxes"; } @@ -444,9 +450,9 @@ sub imap_fetch_all ($$$) { my ($uids, $batch); my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; local $SIG{__WARN__} = sub { + my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; $batch //= '?'; - $warn_cb->("$url UID:$batch\n"); - $warn_cb->(@_); + $warn_cb->("$pfx$url UID:$batch\n", @_); }; my $err; do { @@ -467,12 +473,18 @@ sub imap_fetch_all ($$$) { $l_uid = $uids->[-1] + 1; # for next search my $last_uid; + my $n = $self->{max_batch}; while (scalar @$uids) { + if (--$n < 0) { + _done_for_now($self); + $itrk->update_last($r_uidval, $last_uid); + $n = $self->{max_batch}; + } my @batch = splice(@$uids, 0, $bs); $batch = join(',', @batch); local $0 = "UID:$batch $mbx $sec"; - my $r = $mic->fetch_hash($batch, $req); + my $r = $mic->fetch_hash($batch, $req, 'FLAGS'); unless ($r) { # network error? $err = "E: $url UID FETCH $batch error: $!"; last; @@ -481,14 +493,15 @@ sub imap_fetch_all ($$$) { # messages get deleted, so holes appear my $per_uid = delete $r->{$uid} // next; my $raw = delete($per_uid->{$key}) // next; - imap_import_msg($self, $url, $uid, \$raw); + my $fl = $per_uid->{FLAGS} // ''; + imap_import_msg($self, $url, $uid, \$raw, $fl); $last_uid = $uid; last if $self->{quit}; } last if $self->{quit}; } _done_for_now($self); - $itrk->update_last($r_uidval, $last_uid) if defined $last_uid; + $itrk->update_last($r_uidval, $last_uid); } until ($err || $self->{quit}); $err; } @@ -877,13 +890,20 @@ sub nntp_fetch_all ($$$) { my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; my ($err, $art); local $SIG{__WARN__} = sub { - $warn_cb->("$url ", $art ? ("ARTICLE $art") : (), "\n", @_); + my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; + $warn_cb->("$pfx$url ", $art ? ("ARTICLE $art") : (), "\n", @_); }; my $inboxes = $self->{nntp}->{$url}; my $last_art; + my $n = $self->{max_batch}; for ($beg..$end) { last if $self->{quit}; $art = $_; + if (--$n < 0) { + _done_for_now($self); + $itrk->update_last(0, $last_art); + $n = $self->{max_batch}; + } my $raw = $nn->article($art); unless (defined($raw)) { my $msg = $nn->message; @@ -911,8 +931,8 @@ sub nntp_fetch_all ($$$) { } $last_art = $art; } - $itrk->update_last(0, $last_art) if defined $last_art; _done_for_now($self); + $itrk->update_last(0, $last_art); $err; } @@ -969,12 +989,11 @@ sub fs_scan_step { local $PublicInbox::DS::in_loop = 0; # waitpid() synchronously # continue existing scan - my $max = 10; my $opendirs = $self->{opendirs}; my @dirnames = keys %$opendirs; foreach my $dir (@dirnames) { my $dh = delete $opendirs->{$dir}; - my $n = $max; + my $n = $self->{max_batch}; while (my $fn = readdir($dh)) { _try_path($self, "$dir/$fn"); last if --$n < 0; @@ -989,7 +1008,7 @@ sub fs_scan_step { warn "failed to open $dir: $!\n"; next; } - my $n = $max; + my $n = $self->{max_batch}; while (my $fn = readdir($dh)) { _try_path($self, "$dir/$fn"); last if --$n < 0;