X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FInboxWritable.pm;h=17dfbe18500a6dfa00e87f50209baec4126f61c3;hb=HEAD;hp=982ad6e59d6afc43c265c0b4e454878985238094;hpb=af0b0fb7a454470a32c452119d0392e0dedb3fe1;p=public-inbox.git diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 982ad6e5..17dfbe18 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -9,7 +9,8 @@ use parent qw(PublicInbox::Inbox Exporter); use PublicInbox::Import; use PublicInbox::Filter::Base qw(REJECT); use Errno qw(ENOENT); -our @EXPORT_OK = qw(eml_from_path warn_ignore_cb); +our @EXPORT_OK = qw(eml_from_path); +use Fcntl qw(O_RDONLY O_NONBLOCK); use constant { PERM_UMASK => 0, @@ -46,7 +47,7 @@ sub _init_v1 { require PublicInbox::Msgmap; my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create $sidx->begin_txn_lazy; - my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1); + my $mm = PublicInbox::Msgmap->new_file($self, 1); if (defined $skip_artnum) { $mm->{dbh}->begin_work; $mm->skip_artnum($skip_artnum); @@ -118,25 +119,10 @@ sub filter { undef; } -sub is_maildir_basename ($) { - my ($bn) = @_; - return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/; - if ($bn =~ /:2,([A-Z]+)\z/i) { - my $flags = $1; - return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail - } - 1; -} - -sub is_maildir_path ($) { - my ($path) = @_; - my @p = split(m!/+!, $path); - (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; -} - sub eml_from_path ($) { my ($path) = @_; - if (open my $fh, '<', $path) { + if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) { + return unless -f $fh; # no FIFOs or directories my $str = do { local $/; <$fh> } or return; PublicInbox::Eml->new(\$str); } else { # ENOENT is common with Maildir @@ -145,73 +131,48 @@ sub eml_from_path ($) { } } +sub _each_maildir_eml { + my ($fn, $kw, $eml, $im, $self) = @_; + return if grep(/\Adraft\z/, @$kw); + if ($self && (my $filter = $self->filter($im))) { + my $ret = $filter->scrub($eml) or return; + return if $ret == REJECT(); + $eml = $ret; + } + $im->add($eml); +} + +# XXX does anybody use this? sub import_maildir { my ($self, $dir) = @_; - my $im = $self->importer(1); - foreach my $sub (qw(cur new tmp)) { -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; } - foreach my $sub (qw(cur new)) { - opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; - while (defined(my $fn = readdir($dh))) { - next unless is_maildir_basename($fn); - my $mime = eml_from_path("$dir/$fn") or next; - - if (my $filter = $self->filter($im)) { - my $ret = $filter->scrub($mime) or return; - return if $ret == REJECT(); - $mime = $ret; - } - $im->add($mime); - } - } + my $im = $self->importer(1); + my @self = $self->filter($im) ? ($self) : (); + require PublicInbox::MdirReader; + PublicInbox::MdirReader->new->maildir_each_eml($dir, + \&_each_maildir_eml, $im, @self); $im->done; } -# asctime: From example@example.com Fri Jun 23 02:56:55 2000 -my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/; - -sub mb_add ($$$$) { - my ($im, $variant, $filter, $msg) = @_; - $$msg =~ s/(\r?\n)+\z/$1/s; - if ($variant eq 'mboxrd') { - $$msg =~ s/^>(>*From )/$1/gms; - } elsif ($variant eq 'mboxo') { - $$msg =~ s/^>From /From /gms; - } - my $mime = PublicInbox::Eml->new($msg); +sub _mbox_eml_cb { # MboxReader->mbox* callback + my ($eml, $im, $filter) = @_; if ($filter) { - my $ret = $filter->scrub($mime) or return; + my $ret = $filter->scrub($eml) or return; return if $ret == REJECT(); - $mime = $ret; + $eml = $ret; } - $im->add($mime) + $im->add($eml); } sub import_mbox { my ($self, $fh, $variant) = @_; - if ($variant !~ /\A(?:mboxrd|mboxo)\z/) { - die "variant must be 'mboxrd' or 'mboxo'\n"; - } + require PublicInbox::MboxReader; + my $cb = PublicInbox::MboxReader->reads($variant) or + die "$variant not supported\n"; my $im = $self->importer(1); - my $prev = undef; - my $msg = ''; - my $filter = $self->filter; - while (defined(my $l = <$fh>)) { - if ($l =~ /$from_strict/o) { - if (!defined($prev) || $prev =~ /^\r?$/) { - mb_add($im, $variant, $filter, \$msg) if $msg; - $msg = ''; - $prev = $l; - next; - } - warn "W[$.] $l\n"; - } - $prev = $l; - $msg .= $l; - } - mb_add($im, $variant, $filter, \$msg) if $msg; + $cb->(undef, $fh, \&_mbox_eml_cb, $im, $self->filter); $im->done; } @@ -277,28 +238,6 @@ sub cleanup ($) { delete @{$_[0]}{qw(over mm git search)}; } -# warnings to ignore when handling spam mailboxes and maybe other places -sub warn_ignore { - my $s = "@_"; - # Email::Address::XS warnings - $s =~ /^Argument contains empty address at / - || $s =~ /^Element at index [0-9]+ contains / - # PublicInbox::MsgTime - || $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/ - || $s =~ /^bad Date: .+? in / - # Encode::Unicode::UTF7 - || $s =~ /^Bad UTF7 data escape at / -} - -# this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..." -sub warn_ignore_cb { - my $cb = $SIG{__WARN__} // \&CORE::warn; - sub { - return if warn_ignore(@_); - $cb->(@_); - } -} - # v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove sub git_dir_latest { my ($self, $max) = @_;