From 94ae705673cb03045a109041eec9a6704b8a735b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 31 Aug 2020 04:41:34 +0000 Subject: [PATCH] watch: avoid unnecessary spawning on spam removals This should further mitigate lock contention problems when -watch is configured to watch on a Maildir for spam while performing a large NNTP import. There is now a small risk a message won't get removed because if it's in the current (uncommitted) fast-import batch, but unlikely given the batch size is now only 10 messages. If a that small window is hit, flipping the \Seen flag (e.g. marking it unread, and then read again) will trigger another removal attempt via IMAP or Maildir. --- lib/PublicInbox/Import.pm | 3 +++ lib/PublicInbox/V2Writable.pm | 3 +++ lib/PublicInbox/Watch.pm | 31 +++++++++++++++++++++++++------ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 700b4026..ee5ca2ea 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -461,6 +461,9 @@ sub init_bare { } } +# true if locked and active +sub active { !!$_[0]->{out} } + sub done { my ($self) = @_; my $w = delete $self->{out} or return; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index f2288904..553dd839 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -655,6 +655,9 @@ sub checkpoint ($;$) { # public sub barrier { checkpoint($_[0], 1) }; +# true if locked and active +sub active { !!$_[0]->{im} } + # public sub done { my ($self) = @_; diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 5f786139..0bb92d0a 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -134,15 +134,34 @@ sub _done_for_now { sub remove_eml_i { # each_inbox callback my ($ibx, $arg) = @_; my ($self, $eml, $loc) = @$arg; + eval { - my $im = _importer_for($self, $ibx); - $im->remove($eml, 'spam'); - if (my $scrub = $ibx->filter($im)) { - my $scrubbed = $scrub->scrub($eml, 1); - if ($scrubbed && $scrubbed != REJECT) { - $im->remove($scrubbed, 'spam'); + # try to avoid taking a lock or unnecessary spawning + my $im = $self->{importers}->{"$ibx"}; + my $scrubbed; + if ((!$im || !$im->active) && $ibx->over) { + if (content_exists($ibx, $eml)) { + # continue + } elsif (my $scrub = $ibx->filter($im)) { + $scrubbed = $scrub->scrub($eml, 1); + if ($scrubbed && $scrubbed != REJECT && + !content_exists($ibx, $scrubbed)) { + return; + } + } else { + return; } } + + $im //= _importer_for($self, $ibx); # may spawn fast-import + $im->remove($eml, 'spam'); + $scrubbed //= do { + my $scrub = $ibx->filter($im); + $scrub ? $scrub->scrub($eml, 1) : undef; + }; + if ($scrubbed && $scrubbed != REJECT) { + $im->remove($scrubbed, 'spam'); + } }; if ($@) { warn "error removing spam at: $loc from $ibx->{name}: $@\n"; -- 2.44.0