X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FMboxReader.pm;h=beffabe8e55c3160a14afb74dfc3b3276c7a94cc;hb=2191edf5e099363a860e27d9e1b0f98ef51c6e59;hp=f93c2ec634496d8c4ba8d4a7bfde0c8b9ab260a3;hpb=327a4c0a44ab301922a2b2829d423d2e9fc18faf;p=public-inbox.git diff --git a/lib/PublicInbox/MboxReader.pm b/lib/PublicInbox/MboxReader.pm index f93c2ec6..beffabe8 100644 --- a/lib/PublicInbox/MboxReader.pm +++ b/lib/PublicInbox/MboxReader.pm @@ -41,7 +41,7 @@ sub _mbox_from { $raw =~ s/^\r?\n\z//ms; $raw =~ s/$from_re/$1/gms; my $eml = PublicInbox::Eml->new(\$raw); - $eml_cb->($eml, @arg); + $eml_cb->($eml, @arg) if $eml->raw_size; } return if $r == 0; # EOF } @@ -96,6 +96,7 @@ sub _mbox_cl ($$$;@) { $$hdr =~ s/\A[\r\n]*From [^\n]*\n//s or die "E: no 'From ' line in:\n", Dumper($hdr); my $eml = PublicInbox::Eml->new($hdr); + next unless $eml->raw_size; my @cl = $eml->header_raw('Content-Length'); my $n = scalar(@cl); $n == 0 and die "E: Content-Length missing in:\n", @@ -133,4 +134,75 @@ sub mboxcl2 { sub new { bless \(my $x), __PACKAGE__ } +sub reads { + my $ifmt = $_[-1]; + $ifmt =~ /\Ambox(?:rd|cl|cl2|o)\z/ ? __PACKAGE__->can($ifmt) : undef +} + +# all of these support -c for stdout and -d for decompression, +# mutt is commonly distributed with hooks for gz, bz2 and xz, at least +# { foo => '' } means "--foo" is passed to the command-line, +# otherwise { foo => '--bar' } passes "--bar" +my %zsfx2cmd = ( + gz => [ qw(GZIP pigz gzip) ], + bz2 => [ 'bzip2', {} ], + xz => [ 'xz', {} ], + # don't add new entries here unless MUA support is widely available +); + +sub zsfx ($) { + my ($pathname) = @_; + my $allow = join('|', keys %zsfx2cmd); + $pathname =~ /\.($allow)\z/ ? $1 : undef; +} + +sub zsfx2cmd ($$$) { + my ($zsfx, $decompress, $lei) = @_; + my $x = $zsfx2cmd{$zsfx} // die "BUG: no support for suffix=.$zsfx"; + my @info = @$x; + my $cmd_opt = ref($info[-1]) ? pop(@info) : undef; + my @cmd = (undef, $decompress ? qw(-dc) : qw(-c)); + require PublicInbox::Spawn; + for my $exe (@info) { + # I think respecting client's ENV{GZIP} is OK, not sure + # about ENV overrides for other, less-common compressors + if ($exe eq uc($exe)) { + $exe = $lei->{env}->{$exe} or next; + } + $cmd[0] = PublicInbox::Spawn::which($exe) and last; + } + $cmd[0] // die join(' or ', @info)." missing for .$zsfx"; + + # not all gzip support --rsyncable, FreeBSD gzip doesn't even exit + # with an error code + if (!$decompress && $cmd[0] =~ m!/gzip\z! && !defined($cmd_opt)) { + pipe(my ($r, $w)) or die "pipe: $!"; + open my $null, '+>', '/dev/null' or die "open: $!"; + my $rdr = { 0 => $null, 1 => $null, 2 => $w }; + my $tst = [ $cmd[0], '--rsyncable' ]; + my $pid = PublicInbox::Spawn::spawn($tst, undef, $rdr); + close $w; + my $err = do { local $/; <$r> }; + waitpid($pid, 0) == $pid or die "BUG: waitpid: $!"; + $cmd_opt = $err ? {} : { rsyncable => '' }; + push(@$x, $cmd_opt); + } + for my $bool (keys %$cmd_opt) { + my $switch = $cmd_opt->{$bool} // next; + push @cmd, '--'.($switch || $bool); + } + for my $key (qw(rsyncable)) { # support compression level? + my $switch = $cmd_opt->{$key} // next; + my $val = $lei->{opt}->{$key} // next; + push @cmd, $switch, $val; + } + \@cmd; +} + +sub zsfxcat ($$$) { + my ($in, $zsfx, $lei) = @_; + my $cmd = zsfx2cmd($zsfx, 1, $lei); + PublicInbox::Spawn::popen_rd($cmd, undef, { 0 => $in, 2 => $lei->{2} }); +} + 1;