my $im = $self->importer(1);
my @self = $self->filter($im) ? ($self) : ();
require PublicInbox::MdirReader;
- PublicInbox::MdirReader::maildir_each_file(\&_each_maildir_fn,
- $im, @self);
+ PublicInbox::MdirReader->new->maildir_each_file(\&_each_maildir_fn,
+ $im, @self);
$im->done;
}
return $lei->fail(<<EOM) if $ifmt && $ifmt ne 'maildir';
$input appears to a be a maildir, not $ifmt
EOM
- PublicInbox::MdirReader::maildir_each_eml($input,
+ PublicInbox::MdirReader->new->maildir_each_eml($input,
$self->can('input_maildir_cb'),
$self, @args);
} else {
use Symbol qw(gensym);
use IO::Handle; # ->autoflush
use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
-use Digest::SHA qw(sha256_hex);
my %kw2char = ( # Maildir characters
draft => 'D',
}
}
-sub _augment_or_unlink { # maildir_each_eml cb
- my ($f, $kw, $eml, $lei, $lse, $mod, $shard, $unlink) = @_;
- if ($mod) {
- # can't get dirent.d_ino w/ pure Perl readdir, so we extract
- # the OID if it looks like one instead of doing stat(2)
- my $hex = $f =~ m!\b([a-f0-9]{40,})[^/]*\z! ?
- $1 : sha256_hex($f);
- my $recno = hex(substr($hex, 0, 8));
- return if ($recno % $mod) != $shard;
- update_kw_maybe($lei, $lse, $eml, $kw);
- }
+sub _md_update { # maildir_each_eml cb
+ my ($f, $kw, $eml, $lei, $lse, $unlink) = @_;
+ update_kw_maybe($lei, $lse, $eml, $kw);
$unlink ? unlink($f) : _augment($eml, $lei);
}
my ($self, $lei) = @_;
my $dst = $lei->{ovv}->{dst};
my $lse = $lei->{opt}->{'import-before'} ? $lei->{lse} : undef;
- my ($mod, $shard) = @{$self->{shard_info} // []};
+ my $mdr = PublicInbox::MdirReader->new;
if ($lei->{opt}->{augment}) {
my $dedupe = $lei->{dedupe};
if ($dedupe && $dedupe->prepare_dedupe) {
- PublicInbox::MdirReader::maildir_each_eml($dst,
- \&_augment_or_unlink,
- $lei, $lse, $mod, $shard);
+ $mdr->{shard_info} = $self->{shard_info};
+ $mdr->maildir_each_eml($dst, \&_md_update, $lei, $lse);
$dedupe->pause_dedupe;
}
} elsif ($lse) {
- PublicInbox::MdirReader::maildir_each_eml($dst,
- \&_augment_or_unlink,
- $lei, $lse, $mod, $shard, 1);
+ $mdr->{shard_info} = $self->{shard_info};
+ $mdr->maildir_each_eml($dst, \&_md_update, $lei, $lse, 1);
} else {# clobber existing Maildir
- PublicInbox::MdirReader::maildir_each_file($dst, \&_unlink);
+ $mdr->maildir_each_file($dst, \&_unlink);
}
}
use strict;
use v5.10.1;
use PublicInbox::InboxWritable qw(eml_from_path);
+use Digest::SHA qw(sha256_hex);
# returns Maildir flags from a basename ('' for no flags, undef for invalid)
sub maildir_basename_flags {
$i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef;
}
-sub maildir_each_file ($$;@) {
- my ($dir, $cb, @arg) = @_;
+sub shard_ok ($$$) {
+ my ($bn, $mod, $shard) = @_;
+ # can't get dirent.d_ino w/ pure Perl readdir, so we extract
+ # the OID if it looks like one instead of doing stat(2)
+ my $hex = $bn =~ m!\A([a-f0-9]{40,})! ? $1 : sha256_hex($bn);
+ my $recno = hex(substr($hex, 0, 8));
+ ($recno % $mod) == $shard;
+}
+
+sub maildir_each_file {
+ my ($self, $dir, $cb, @arg) = @_;
$dir .= '/' unless substr($dir, -1) eq '/';
+ my ($mod, $shard) = @{$self->{shard_info} // []};
for my $d (qw(new/ cur/)) {
my $pfx = $dir.$d;
opendir my $dh, $pfx or next;
while (defined(my $bn = readdir($dh))) {
maildir_basename_flags($bn) // next;
+ next if defined($mod) && !shard_ok($bn, $mod, $shard);
$cb->($pfx.$bn, @arg);
}
}
my %c2kw = ('D' => 'draft', F => 'flagged', P => 'forwarded',
R => 'answered', S => 'seen');
-sub maildir_each_eml ($$;@) {
- my ($dir, $cb, @arg) = @_;
+sub maildir_each_eml {
+ my ($self, $dir, $cb, @arg) = @_;
$dir .= '/' unless substr($dir, -1) eq '/';
+ my ($mod, $shard) = @{$self->{shard_info} // []};
my $pfx = $dir . 'new/';
if (opendir(my $dh, $pfx)) {
while (defined(my $bn = readdir($dh))) {
next if substr($bn, 0, 1) eq '.';
my @f = split(/:/, $bn, -1);
next if scalar(@f) != 1;
+ next if defined($mod) && !shard_ok($bn, $mod, $shard);
my $f = $pfx.$bn;
my $eml = eml_from_path($f) or next;
$cb->($f, [], $eml, @arg);
while (defined(my $bn = readdir($dh))) {
my $fl = maildir_basename_flags($bn) // next;
next if index($fl, 'T') >= 0;
+ next if defined($mod) && !shard_ok($bn, $mod, $shard);
my $f = $pfx.$bn;
my $eml = eml_from_path($f) or next;
my @kw = sort(map { $c2kw{$_} // () } split(//, $fl));
}
}
+sub new { bless {}, __PACKAGE__ }
+
1;
lei_ok('convert', '-o', "$d/md", "mboxrd:$d/foo.mboxrd");
ok(-d "$d/md", 'Maildir created');
my @md;
- PublicInbox::MdirReader::maildir_each_eml("$d/md", sub {
+ PublicInbox::MdirReader->new->maildir_each_eml("$d/md", sub {
push @md, $_[2];
});
is(scalar(@md), scalar(@mboxrd), 'got expected emails in Maildir') or
}
{ # Maildir support
- my $each_file = PublicInbox::MdirReader->can('maildir_each_file');
+ my $mdr = PublicInbox::MdirReader->new;
my $md = "$tmpdir/maildir/";
my $wcb = $wcb_get->('maildir', $md);
is(ref($wcb), 'CODE', 'got Maildir callback');
$wcb->(\(my $x = $buf), $b4dc0ffee);
my @f;
- $each_file->($md, sub { push @f, shift });
+ $mdr->maildir_each_file($md, sub { push @f, shift });
open my $fh, $f[0] or BAIL_OUT $!;
is(do { local $/; <$fh> }, $buf, 'wrote to Maildir');
$wcb->(\($x = $buf."\nx\n"), $deadcafe);
my @x = ();
- $each_file->($md, sub { push @x, shift });
+ $mdr->maildir_each_file($md, sub { push @x, shift });
is(scalar(@x), 1, 'wrote one new file');
ok(!-f $f[0], 'old file clobbered');
open $fh, $x[0] or BAIL_OUT $!;
$wcb->(\($x = $buf."\ny\n"), $deadcafe);
$wcb->(\($x = $buf."\ny\n"), $b4dc0ffee); # skipped by dedupe
@f = ();
- $each_file->($md, sub { push @f, shift });
+ $mdr->maildir_each_file($md, sub { push @f, shift });
is(scalar grep(/\A\Q$x[0]\E\z/, @f), 1, 'old file still there');
my @new = grep(!/\A\Q$x[0]\E\z/, @f);
is(scalar @new, 1, '1 new file written (b4dc0ffee skipped)');