X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FMboxReader.pm;h=beffabe8e55c3160a14afb74dfc3b3276c7a94cc;hb=2191edf5e099363a860e27d9e1b0f98ef51c6e59;hp=ac0c0f52deccc9b1a4aaf8f0dd73fee5ba889050;hpb=7f17df5c6f1892ef53f149a0ab24a5d917cce7d9;p=public-inbox.git diff --git a/lib/PublicInbox/MboxReader.pm b/lib/PublicInbox/MboxReader.pm index ac0c0f52..beffabe8 100644 --- a/lib/PublicInbox/MboxReader.pm +++ b/lib/PublicInbox/MboxReader.pm @@ -1,20 +1,30 @@ -# Copyright (C) 2020 all contributors +# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # reader for mbox variants we support package PublicInbox::MboxReader; use strict; use v5.10.1; -use PublicInbox::DS (); # localize $in_loop for error detection :< use Data::Dumper; $Data::Dumper::Useqq = 1; # should've been the default, for bad data my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^\n:]+:[^\n:]+:[^\n:]+ [^\n:]+\n/sm; +# cf: https://doc.dovecot.org/configuration_manual/mail_location/mbox/ +my %status2kw = (F => 'flagged', A => 'answered', R => 'seen', T => 'draft'); +# O (old/non-recent), and D (deleted) aren't in JMAP, +# so probably won't be supported by us. +sub mbox_keywords { + my $eml = $_[-1]; + my $s = "@{[$eml->header_raw('X-Status'),$eml->header_raw('Status')]}"; + my %kw; + $s =~ s/([FART])/$kw{$status2kw{$1}} = 1/sge; + [ sort(keys %kw) ]; +} + sub _mbox_from { my ($mbfh, $from_re, $eml_cb, @arg) = @_; - local $PublicInbox::DS::in_loop; # disable dwaitpid my $buf = ''; my @raw; while (defined(my $r = read($mbfh, $buf, 65536, length($buf)))) { @@ -28,10 +38,10 @@ sub _mbox_from { } @raw = grep /[^ \t\r\n]/s, @raw; # skip empty messages while (defined(my $raw = shift @raw)) { - $raw =~ s/\r?\n\z//s; + $raw =~ s/^\r?\n\z//ms; $raw =~ s/$from_re/$1/gms; my $eml = PublicInbox::Eml->new(\$raw); - $eml_cb->($eml, @arg); + $eml_cb->($eml, @arg) if $eml->raw_size; } return if $r == 0; # EOF } @@ -75,7 +85,6 @@ sub _extract_hdr { sub _mbox_cl ($$$;@) { my ($mbfh, $uxs_from, $eml_cb, @arg) = @_; - local $PublicInbox::DS::in_loop; # disable dwaitpid my $buf = ''; while (defined(my $r = read($mbfh, $buf, 65536, length($buf)))) { if ($r == 0) { # detect "curl --fail" @@ -87,6 +96,7 @@ sub _mbox_cl ($$$;@) { $$hdr =~ s/\A[\r\n]*From [^\n]*\n//s or die "E: no 'From ' line in:\n", Dumper($hdr); my $eml = PublicInbox::Eml->new($hdr); + next unless $eml->raw_size; my @cl = $eml->header_raw('Content-Length'); my $n = scalar(@cl); $n == 0 and die "E: Content-Length missing in:\n", @@ -124,4 +134,75 @@ sub mboxcl2 { sub new { bless \(my $x), __PACKAGE__ } +sub reads { + my $ifmt = $_[-1]; + $ifmt =~ /\Ambox(?:rd|cl|cl2|o)\z/ ? __PACKAGE__->can($ifmt) : undef +} + +# all of these support -c for stdout and -d for decompression, +# mutt is commonly distributed with hooks for gz, bz2 and xz, at least +# { foo => '' } means "--foo" is passed to the command-line, +# otherwise { foo => '--bar' } passes "--bar" +my %zsfx2cmd = ( + gz => [ qw(GZIP pigz gzip) ], + bz2 => [ 'bzip2', {} ], + xz => [ 'xz', {} ], + # don't add new entries here unless MUA support is widely available +); + +sub zsfx ($) { + my ($pathname) = @_; + my $allow = join('|', keys %zsfx2cmd); + $pathname =~ /\.($allow)\z/ ? $1 : undef; +} + +sub zsfx2cmd ($$$) { + my ($zsfx, $decompress, $lei) = @_; + my $x = $zsfx2cmd{$zsfx} // die "BUG: no support for suffix=.$zsfx"; + my @info = @$x; + my $cmd_opt = ref($info[-1]) ? pop(@info) : undef; + my @cmd = (undef, $decompress ? qw(-dc) : qw(-c)); + require PublicInbox::Spawn; + for my $exe (@info) { + # I think respecting client's ENV{GZIP} is OK, not sure + # about ENV overrides for other, less-common compressors + if ($exe eq uc($exe)) { + $exe = $lei->{env}->{$exe} or next; + } + $cmd[0] = PublicInbox::Spawn::which($exe) and last; + } + $cmd[0] // die join(' or ', @info)." missing for .$zsfx"; + + # not all gzip support --rsyncable, FreeBSD gzip doesn't even exit + # with an error code + if (!$decompress && $cmd[0] =~ m!/gzip\z! && !defined($cmd_opt)) { + pipe(my ($r, $w)) or die "pipe: $!"; + open my $null, '+>', '/dev/null' or die "open: $!"; + my $rdr = { 0 => $null, 1 => $null, 2 => $w }; + my $tst = [ $cmd[0], '--rsyncable' ]; + my $pid = PublicInbox::Spawn::spawn($tst, undef, $rdr); + close $w; + my $err = do { local $/; <$r> }; + waitpid($pid, 0) == $pid or die "BUG: waitpid: $!"; + $cmd_opt = $err ? {} : { rsyncable => '' }; + push(@$x, $cmd_opt); + } + for my $bool (keys %$cmd_opt) { + my $switch = $cmd_opt->{$bool} // next; + push @cmd, '--'.($switch || $bool); + } + for my $key (qw(rsyncable)) { # support compression level? + my $switch = $cmd_opt->{$key} // next; + my $val = $lei->{opt}->{$key} // next; + push @cmd, $switch, $val; + } + \@cmd; +} + +sub zsfxcat ($$$) { + my ($in, $zsfx, $lei) = @_; + my $cmd = zsfx2cmd($zsfx, 1, $lei); + PublicInbox::Spawn::popen_rd($cmd, undef, { 0 => $in, 2 => $lei->{2} }); +} + 1;