1 # Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # Maildirs for now, MH eventually
5 # ref: https://cr.yp.to/proto/maildir.html
6 # https://wiki2.dovecot.org/MailboxFormat/Maildir
7 package PublicInbox::MdirReader;
10 use PublicInbox::InboxWritable qw(eml_from_path);
11 use Digest::SHA qw(sha256_hex);
13 # returns Maildir flags from a basename ('' for no flags, undef for invalid)
14 sub maildir_basename_flags {
15 my (@f) = split(/:/, $_[0], -1);
16 return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.');
17 $f[1] // return ''; # "new"
18 $f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur"
21 # same as above, but for full path name
22 sub maildir_path_flags {
24 my $i = rindex($f, '/');
25 $i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef;
29 my ($bn, $mod, $shard) = @_;
30 # can't get dirent.d_ino w/ pure Perl readdir, so we extract
31 # the OID if it looks like one instead of doing stat(2)
32 my $hex = $bn =~ m!\A([a-f0-9]{40,})! ? $1 : sha256_hex($bn);
33 my $recno = hex(substr($hex, 0, 8));
34 ($recno % $mod) == $shard;
37 sub maildir_each_file {
38 my ($self, $dir, $cb, @arg) = @_;
39 $dir .= '/' unless substr($dir, -1) eq '/';
40 my ($mod, $shard) = @{$self->{shard_info} // []};
41 for my $d (qw(new/ cur/)) {
43 opendir my $dh, $pfx or next;
44 while (defined(my $bn = readdir($dh))) {
45 my $fl = maildir_basename_flags($bn) // next;
46 next if defined($mod) && !shard_ok($bn, $mod, $shard);
47 next if index($fl, 'T') >= 0; # no Trashed messages
48 $cb->($pfx.$bn, $fl, @arg);
53 my %c2kw = ('D' => 'draft', F => 'flagged', P => 'forwarded',
54 R => 'answered', S => 'seen');
56 sub maildir_each_eml {
57 my ($self, $dir, $cb, @arg) = @_;
58 $dir .= '/' unless substr($dir, -1) eq '/';
59 my ($mod, $shard) = @{$self->{shard_info} // []};
60 my $pfx = $dir . 'new/';
61 if (opendir(my $dh, $pfx)) {
62 while (defined(my $bn = readdir($dh))) {
63 next if substr($bn, 0, 1) eq '.';
64 my @f = split(/:/, $bn, -1);
66 # mbsync and offlineimap both use "2," in "new/"
67 next if ($f[1] // '2,') ne '2,' || defined($f[2]);
69 next if defined($mod) && !shard_ok($bn, $mod, $shard);
71 my $eml = eml_from_path($f) or next;
72 $cb->($f, [], $eml, @arg);
76 opendir my $dh, $pfx or return;
77 while (defined(my $bn = readdir($dh))) {
78 my $fl = maildir_basename_flags($bn) // next;
79 next if index($fl, 'T') >= 0;
80 next if defined($mod) && !shard_ok($bn, $mod, $shard);
82 my $eml = eml_from_path($f) or next;
83 my @kw = sort(map { $c2kw{$_} // () } split(//, $fl));
84 $cb->($f, \@kw, $eml, @arg);
88 sub new { bless {}, __PACKAGE__ }
94 for (split(//, $_[0])) {
104 [ sort(map { $c2kw{$_} // () } split(//, $_[0])) ];