]> Sergey Matveev's repositories - public-inbox.git/blobdiff - lib/PublicInbox/WatchMaildir.pm
watch: support multiple watch: directives per-inbox
[public-inbox.git] / lib / PublicInbox / WatchMaildir.pm
index e0caaa563b24b0f64f2aa0c6855ac6b1e5dd7197..621d41bd81d40e2166bbd64a6d2f200671323fc2 100644 (file)
@@ -50,7 +50,7 @@ sub new {
        foreach my $pfx (qw(publicinboxwatch publicinboxlearn)) {
                my $k = "$pfx.watchspam";
                defined(my $dirs = $config->{$k}) or next;
-               $dirs = [ $dirs ] if !ref($dirs);
+               $dirs = PublicInbox::Config::_array($dirs);
                for my $dir (@$dirs) {
                        if (is_maildir($dir)) {
                                # skip "new", no MUA has seen it, yet.
@@ -75,21 +75,25 @@ sub new {
                # need to make all inboxes writable for spam removal:
                my $ibx = $_[0] = PublicInbox::InboxWritable->new($_[0]);
 
-               my $watch = $ibx->{watch} or return;
-               if (is_maildir($watch)) {
-                       compile_watchheaders($ibx);
-                       my ($new, $cur) = ("$watch/new", "$watch/cur");
-                       return if is_watchspam($cur, $mdmap{$cur}, $ibx);
-                       push @mdir, $new unless $uniq{$new}++;
-                       push @mdir, $cur unless $uniq{$cur}++;
-                       push @{$mdmap{$new} ||= []}, $ibx;
-                       push @{$mdmap{$cur} ||= []}, $ibx;
-               } elsif (my $url = imap_url($watch)) {
-                       return if is_watchspam($url, $imap{$url}, $ibx);
-                       compile_watchheaders($ibx);
-                       push @{$imap{$url} ||= []}, $ibx;
-               } else {
-                       warn "watch unsupported: $k=$watch\n";
+               my $watches = $ibx->{watch} or return;
+               $watches = PublicInbox::Config::_array($watches);
+               for my $watch (@$watches) {
+                       if (is_maildir($watch)) {
+                               compile_watchheaders($ibx);
+                               my ($new, $cur) = ("$watch/new", "$watch/cur");
+                               my $cur_dst = $mdmap{$cur} //= [];
+                               return if is_watchspam($cur, $cur_dst, $ibx);
+                               push @mdir, $new unless $uniq{$new}++;
+                               push @mdir, $cur unless $uniq{$cur}++;
+                               push @{$mdmap{$new} //= []}, $ibx;
+                               push @$cur_dst, $ibx;
+                       } elsif (my $url = imap_url($watch)) {
+                               return if is_watchspam($url, $imap{$url}, $ibx);
+                               compile_watchheaders($ibx);
+                               push @{$imap{$url} ||= []}, $ibx;
+                       } else {
+                               warn "watch unsupported: $k=$watch\n";
+                       }
                }
        });
        return unless scalar(@mdir) || scalar(keys %imap);
@@ -208,9 +212,11 @@ sub quit {
        }
        if (my $idle_mic = $self->{idle_mic}) {
                eval { $idle_mic->done };
-               warn "IDLE DONE error: $@\n" if $@;
-               eval { $idle_mic->disconnect };
-               warn "IDLE LOGOUT error: $@\n" if $@;
+               if ($@) {
+                       warn "IDLE DONE error: $@\n";
+                       eval { $idle_mic->disconnect };
+                       warn "IDLE LOGOUT error: $@\n" if $@;
+               }
        }
 }
 
@@ -274,6 +280,15 @@ sub imap_common_init ($) {
                $self->{imap_opt}->{$sec}->{poll_intvl} = $to if $to;
                $to = cfg_intvl($cfg, 'imap', 'IdleInterval', $sec, $url);
                $self->{imap_opt}->{$sec}->{idle_intvl} = $to if $to;
+
+               my $key = lc("imap.$sec.fetchBatchSize");
+               my $bs = $cfg->{lc($key)} //
+                       $cfg->urlmatch('imap.fetchBatchSize', $url) // next;
+               if ($bs =~ /\A([0-9]+)\z/) {
+                       $self->{imap_opt}->{$sec}->{batch_size} = $bs;
+               } else {
+                       warn "W: $key=$bs is not an integer\n";
+               }
        }
        $mic_args;
 }
@@ -335,12 +350,12 @@ sub mic_for ($$$) { # mic = Mail::IMAPClient
        $mic;
 }
 
-sub imap_import_msg ($$$$$) {
-       my ($self, $itrk, $r_uidval, $uid, $raw) = @_;
+sub imap_import_msg ($$$$) {
+       my ($self, $url, $uid, $raw) = @_;
        # our target audience expects LF-only, save storage
        $$raw =~ s/\r\n/\n/sg;
 
-       my $inboxes = $self->{imap}->{$itrk->{url}};
+       my $inboxes = $self->{imap}->{$url};
        if (ref($inboxes)) {
                for my $ibx (@$inboxes) {
                        my $eml = PublicInbox::Eml->new($$raw);
@@ -348,12 +363,11 @@ sub imap_import_msg ($$$$$) {
                }
        } elsif ($inboxes eq 'watchspam') {
                my $eml = PublicInbox::Eml->new($raw);
-               my $arg = [ $self, $eml, "$itrk->{url} UID:$uid" ];
+               my $arg = [ $self, $eml, "$url UID:$uid" ];
                $self->{config}->each_inbox(\&remove_eml_i, $arg);
        } else {
                die "BUG: destination unknown $inboxes";
        }
-       $itrk->update_last($r_uidval, $uid);
 }
 
 sub imap_fetch_all ($$$) {
@@ -390,47 +404,60 @@ sub imap_fetch_all ($$$) {
 
        warn "I: $url fetching UID $l_uid:$r_uid\n";
        $mic->Uid(1); # the default, we hope
-       my $uids;
+       my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 1;
        my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
+
+       # TODO: FLAGS may be useful for personal use
        my $key = $req;
        $key =~ s/\.PEEK//;
-       my $uid;
+       my ($uids, $batch);
        my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
        local $SIG{__WARN__} = sub {
-               $uid //= -1;
-               $warn_cb->("$url UID:$uid\n");
+               $batch //= '?';
+               $warn_cb->("$url UID:$batch\n");
                $warn_cb->(@_);
        };
        my $err;
        do {
+               # I wish "UID FETCH $START:*" could work, but:
+               # 1) servers do not need to return results in any order
+               # 2) Mail::IMAPClient doesn't offer a streaming API
                $uids = $mic->search("UID $l_uid:*") or
                        return "E: $url UID SEARCH $l_uid:* error: $!";
                return if scalar(@$uids) == 0;
 
                # RFC 3501 doesn't seem to indicate order of UID SEARCH
-               # responses, so sort it ourselves
+               # responses, so sort it ourselves.  Order matters so
+               # IMAPTracker can store the newest UID.
                @$uids = sort { $a <=> $b } @$uids;
 
                # Did we actually get new messages?
                return if $uids->[0] < $l_uid;
 
                $l_uid = $uids->[-1] + 1; # for next search
+               my $last_uid;
 
-               $itrk->{dbh}->begin_work;
-               while (defined(($uid = shift(@$uids)))) {
-                       local $0 = "UID:$uid $mbx $sec";
-                       my $r = $mic->fetch_hash($uid, $req);
+               while (scalar @$uids) {
+                       my @batch = splice(@$uids, 0, $bs);
+                       $batch = join(',', @batch);
+                       local $0 = "UID:$batch $mbx $sec";
+                       my $r = $mic->fetch_hash($batch, $req);
                        unless ($r) { # network error?
-                               $err = "E: $url UID FETCH $uid error: $!";
+                               $err = "E: $url UID FETCH $batch error: $!";
                                last;
                        }
-                       # messages get deleted, so holes appear
-                       defined(my $raw = delete $r->{$uid}->{$key}) or next;
-                       imap_import_msg($self, $itrk, $r_uidval, $uid, \$raw);
+                       for my $uid (@batch) {
+                               # messages get deleted, so holes appear
+                               my $per_uid = delete $r->{$uid} // next;
+                               my $raw = delete($per_uid->{$key}) // next;
+                               imap_import_msg($self, $url, $uid, \$raw);
+                               $last_uid = $uid;
+                               last if $self->{quit};
+                       }
                        last if $self->{quit};
                }
                _done_for_now($self);
-               $itrk->{dbh}->commit;
+               $itrk->update_last($r_uidval, $last_uid) if defined $last_uid;
        } until ($err || $self->{quit});
        $err;
 }