-# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Extends read-only Inbox for writing
use PublicInbox::Import;
use PublicInbox::Filter::Base qw(REJECT);
use Errno qw(ENOENT);
-our @EXPORT_OK = qw(eml_from_path warn_ignore_cb);
+our @EXPORT_OK = qw(eml_from_path);
+use Fcntl qw(O_RDONLY O_NONBLOCK);
use constant {
PERM_UMASK => 0,
undef;
}
-sub is_maildir_basename ($) {
- my ($bn) = @_;
- return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/;
- if ($bn =~ /:2,([A-Z]+)\z/i) {
- my $flags = $1;
- return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail
- }
- 1;
-}
-
-sub is_maildir_path ($) {
- my ($path) = @_;
- my @p = split(m!/+!, $path);
- (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0;
-}
-
sub eml_from_path ($) {
my ($path) = @_;
- if (open my $fh, '<', $path) {
+ if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) {
+ return unless -f $fh; # no FIFOs or directories
my $str = do { local $/; <$fh> } or return;
PublicInbox::Eml->new(\$str);
} else { # ENOENT is common with Maildir
}
}
+sub _each_maildir_fn {
+ my ($fn, $im, $self) = @_;
+ if ($fn =~ /:2,([A-Za-z]*)\z/) {
+ my $fl = $1;
+ return if $fl =~ /[DT]/; # no Drafts or Trash for public
+ }
+ my $eml = eml_from_path($fn) or return;
+ if ($self && (my $filter = $self->filter($im))) {
+ my $ret = $filter->scrub($eml) or return;
+ return if $ret == REJECT();
+ $eml = $ret;
+ }
+ $im->add($eml);
+}
+
sub import_maildir {
my ($self, $dir) = @_;
- my $im = $self->importer(1);
-
foreach my $sub (qw(cur new tmp)) {
-d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
}
- foreach my $sub (qw(cur new)) {
- opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n";
- while (defined(my $fn = readdir($dh))) {
- next unless is_maildir_basename($fn);
- my $mime = eml_from_path("$dir/$fn") or next;
-
- if (my $filter = $self->filter($im)) {
- my $ret = $filter->scrub($mime) or return;
- return if $ret == REJECT();
- $mime = $ret;
- }
- $im->add($mime);
- }
- }
+ my $im = $self->importer(1);
+ my @self = $self->filter($im) ? ($self) : ();
+ PublicInbox::MdirReader::maildir_each_file(\&_each_maildir_fn,
+ $im, @self);
$im->done;
}
delete @{$_[0]}{qw(over mm git search)};
}
-# warnings to ignore when handling spam mailboxes and maybe other places
-sub warn_ignore {
- my $s = "@_";
- # Email::Address::XS warnings
- $s =~ /^Argument contains empty address at /
- || $s =~ /^Element at index [0-9]+ contains /
- # PublicInbox::MsgTime
- || $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/
- || $s =~ /^bad Date: .+? in /
- # Encode::Unicode::UTF7
- || $s =~ /^Bad UTF7 data escape at /
-}
-
-# this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..."
-sub warn_ignore_cb {
- my $cb = $SIG{__WARN__} // sub { print STDERR @_ };
- sub {
- return if warn_ignore(@_);
- $cb->(@_);
- }
-}
-
# v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove
sub git_dir_latest {
my ($self, $max) = @_;