X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FMdirReader.pm;h=dbb74d6d9772f85afb5b9b0fc4b4b07e1b61ac60;hb=23af251dd607c4e75ab1e68063f2c885c48cc035;hp=1685e4d8ee2883f283ef606a478957fd85dc1ab2;hpb=883db1e765b9fd0af7ed50b8c5df5527f566f0ba;p=public-inbox.git diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm index 1685e4d8..dbb74d6d 100644 --- a/lib/PublicInbox/MdirReader.pm +++ b/lib/PublicInbox/MdirReader.pm @@ -8,6 +8,7 @@ package PublicInbox::MdirReader; use strict; use v5.10.1; use PublicInbox::InboxWritable qw(eml_from_path); +use Digest::SHA qw(sha256_hex); # returns Maildir flags from a basename ('' for no flags, undef for invalid) sub maildir_basename_flags { @@ -24,15 +25,27 @@ sub maildir_path_flags { $i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef; } -sub maildir_each_file ($$;@) { - my ($dir, $cb, @arg) = @_; +sub shard_ok ($$$) { + my ($bn, $mod, $shard) = @_; + # can't get dirent.d_ino w/ pure Perl readdir, so we extract + # the OID if it looks like one instead of doing stat(2) + my $hex = $bn =~ m!\A([a-f0-9]{40,})! ? $1 : sha256_hex($bn); + my $recno = hex(substr($hex, 0, 8)); + ($recno % $mod) == $shard; +} + +sub maildir_each_file { + my ($self, $dir, $cb, @arg) = @_; $dir .= '/' unless substr($dir, -1) eq '/'; + my ($mod, $shard) = @{$self->{shard_info} // []}; for my $d (qw(new/ cur/)) { my $pfx = $dir.$d; opendir my $dh, $pfx or next; while (defined(my $bn = readdir($dh))) { - maildir_basename_flags($bn) // next; - $cb->($pfx.$bn, @arg); + my $fl = maildir_basename_flags($bn) // next; + next if defined($mod) && !shard_ok($bn, $mod, $shard); + next if index($fl, 'T') >= 0; # no Trashed messages + $cb->($pfx.$bn, $fl, @arg); } } } @@ -40,15 +53,20 @@ sub maildir_each_file ($$;@) { my %c2kw = ('D' => 'draft', F => 'flagged', P => 'forwarded', R => 'answered', S => 'seen'); -sub maildir_each_eml ($$;@) { - my ($dir, $cb, @arg) = @_; +sub maildir_each_eml { + my ($self, $dir, $cb, @arg) = @_; $dir .= '/' unless substr($dir, -1) eq '/'; + my ($mod, $shard) = @{$self->{shard_info} // []}; my $pfx = $dir . 'new/'; if (opendir(my $dh, $pfx)) { while (defined(my $bn = readdir($dh))) { next if substr($bn, 0, 1) eq '.'; my @f = split(/:/, $bn, -1); - next if scalar(@f) != 1; + + # mbsync and offlineimap both use "2," in "new/" + next if ($f[1] // '2,') ne '2,' || defined($f[2]); + + next if defined($mod) && !shard_ok($bn, $mod, $shard); my $f = $pfx.$bn; my $eml = eml_from_path($f) or next; $cb->($f, [], $eml, @arg); @@ -59,6 +77,7 @@ sub maildir_each_eml ($$;@) { while (defined(my $bn = readdir($dh))) { my $fl = maildir_basename_flags($bn) // next; next if index($fl, 'T') >= 0; + next if defined($mod) && !shard_ok($bn, $mod, $shard); my $f = $pfx.$bn; my $eml = eml_from_path($f) or next; my @kw = sort(map { $c2kw{$_} // () } split(//, $fl)); @@ -66,4 +85,24 @@ sub maildir_each_eml ($$;@) { } } +sub new { bless {}, __PACKAGE__ } + +sub flags2kw ($) { + if (wantarray) { + my @unknown; + my %kw; + for (split(//, $_[0])) { + my $k = $c2kw{$_}; + if (defined($k)) { + $kw{$k} = 1; + } else { + push @unknown, $_; + } + } + (\%kw, \@unknown); + } else { + [ sort(map { $c2kw{$_} // () } split(//, $_[0])) ]; + } +} + 1;