X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLeiToMail.pm;h=f0adc44f23e6088b13e40d43b01fec9fa1705c7d;hb=63283ae1b51203c930332e6887296cb123e5db6c;hp=cea68319cd309d4c4152db8fe292dee3efd65fd7;hpb=5c46247509080a4c0d6eb3db56ec62bfab29e76e;p=public-inbox.git diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index cea68319..f0adc44f 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -12,11 +12,17 @@ use PublicInbox::ProcessPipe; use PublicInbox::Spawn qw(which spawn popen_rd); use PublicInbox::LeiDedupe; use PublicInbox::OnDestroy; +use PublicInbox::Git; +use PublicInbox::GitAsyncCat; use Symbol qw(gensym); use IO::Handle; # ->autoflush use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY); -use Errno qw(EEXIST ESPIPE ENOENT); -use PublicInbox::Git; +use Errno qw(EEXIST ESPIPE ENOENT EPIPE); +my ($maildir_each_file); + +# struggles with short-lived repos, Gcf2Client makes little sense with lei; +# but we may use in-process libgit2 in the future. +$PublicInbox::GitAsyncCat::GCF2C = 0; my %kw2char = ( # Maildir characters draft => 'D', @@ -63,14 +69,16 @@ sub _mbox_hdr_buf ($$$) { } sub atomic_append { # for on-disk destinations (O_APPEND, or O_EXCL) - my ($fh, $buf) = @_; - defined(my $w = syswrite($fh, $$buf)) or die "write: $!"; - $w == length($$buf) or die "short write: $w != ".length($$buf); -} - -sub _print_full { - my ($fh, $buf) = @_; - print $fh $$buf or die "print: $!"; + my ($lei, $buf) = @_; + if (defined(my $w = syswrite($lei->{1} // return, $$buf))) { + return if $w == length($$buf); + $buf = "short atomic write: $w != ".length($$buf); + } elsif ($! == EPIPE) { + return $lei->note_sigpipe(1); + } else { + $buf = "atomic write: $!"; + } + $lei->fail($buf); } sub eml2mboxrd ($;$) { @@ -204,10 +212,10 @@ sub zsfx2cmd ($$$) { } sub _post_augment_mbox { # open a compressor process - my ($self, $lei, $zpipe) = @_; + my ($self, $lei) = @_; my $zsfx = $self->{zsfx} or return; my $cmd = zsfx2cmd($zsfx, undef, $lei); - my ($r, $w) = splice(@$zpipe, 0, 2); + my ($r, $w) = @{delete $lei->{zpipe}}; my $rdr = { 0 => $r, 1 => $lei->{1}, 2 => $lei->{2} }; my $pid = spawn($cmd, $lei->{env}, $rdr); my $pp = gensym; @@ -227,9 +235,7 @@ sub decompress_src ($$$) { sub dup_src ($) { my ($in) = @_; - # fileno needed because wq_set_recv_modes only used ">&=" for {1} - # and Perl blindly trusts that to reject the '+' (readability flag) - open my $dup, '+>>&=', fileno($in) or die "dup: $!"; + open my $dup, '+>>&', $in or die "dup: $!"; $dup; } @@ -245,60 +251,49 @@ sub _mbox_write_cb ($$) { my $ovv = $lei->{ovv}; my $m = 'eml2'.$ovv->{fmt}; my $eml2mbox = $self->can($m) or die "$self->$m missing"; - my $out = $lei->{1} // die "no stdout ($m, $ovv->{dst})"; # redirected earlier - $out->autoflush(1); - my $write = $ovv->{lock_path} ? \&_print_full : \&atomic_append; + $lei->{1} // die "no stdout ($m, $ovv->{dst})"; # redirected earlier + $lei->{1}->autoflush(1); + my $atomic_append = !defined($ovv->{lock_path}); my $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe; sub { # for git_to_mail - my ($buf, $smsg) = @_; - return unless $out; - my $eml = PublicInbox::Eml->new($buf); - if (!$dedupe->is_dup($eml, $smsg->{blob})) { - $buf = $eml2mbox->($eml, $smsg); - my $lk = $ovv->lock_for_scope; - eval { $write->($out, $buf) }; - if ($@) { - die $@ if ref($@) ne 'PublicInbox::SIGPIPE'; - undef $out - } - } - } -} - -sub _maildir_each_file ($$;@) { - my ($dir, $cb, @arg) = @_; - for my $d (qw(new/ cur/)) { - my $pfx = $dir.$d; - opendir my $dh, $pfx or next; - while (defined(my $fn = readdir($dh))) { - $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/; - } + my ($buf, $smsg, $eml) = @_; + $eml //= PublicInbox::Eml->new($buf); + return if $dedupe->is_dup($eml, $smsg->{blob}); + $buf = $eml2mbox->($eml, $smsg); + return atomic_append($lei, $buf) if $atomic_append; + my $lk = $ovv->lock_for_scope; + $lei->out($$buf); } } -sub _augment_file { # _maildir_each_file cb +sub _augment_file { # maildir_each_file cb my ($f, $lei) = @_; my $eml = PublicInbox::InboxWritable::eml_from_path($f) or return; _augment($eml, $lei); } -# _maildir_each_file callback, \&CORE::unlink doesn't work with it +# maildir_each_file callback, \&CORE::unlink doesn't work with it sub _unlink { unlink($_[0]) } +sub _rand () { + state $seq = 0; + sprintf('%x,%x,%x,%x', rand(0xffffffff), time, $$, ++$seq); +} + sub _buf2maildir { my ($dst, $buf, $smsg) = @_; my $kw = $smsg->{kw} // []; my $sfx = join('', sort(map { $kw2char{$_} // () } @$kw)); my $rand = ''; # chosen by die roll :P - my ($tmp, $fh, $final); - my $common = $smsg->{blob}; + my ($tmp, $fh, $final, $ok); + my $common = $smsg->{blob} // _rand; if (defined(my $pct = $smsg->{pct})) { $common .= "=$pct" } do { $tmp = $dst.'tmp/'.$rand.$common; - } while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_WRONLY) && - $! == EEXIST && ($rand = int(rand 0x7fffffff).',')); - if (print $fh $$buf and close($fh)) { + } while (!($ok = sysopen($fh, $tmp, O_CREAT|O_EXCL|O_WRONLY)) && + $! == EEXIST && ($rand = _rand.',')); + if ($ok && print $fh $$buf and close($fh)) { # ignore new/ and write only to cur/, otherwise MUAs # with R/W access to the Maildir will end up doing # a mass rename which can take a while with thousands @@ -308,24 +303,27 @@ sub _buf2maildir { do { $final = $dst.$rand.$common.':2,'.$sfx; } while (!link($tmp, $final) && $! == EEXIST && - ($rand = int(rand 0x7fffffff).',')); + ($rand = _rand.',')); unlink($tmp) or warn "W: failed to unlink $tmp: $!\n"; } else { - my $err = $!; + my $err = "Error writing $smsg->{blob} to $dst: $!\n"; + $_[0] = undef; # clobber dst unlink($tmp); - die "Error writing $smsg->{blob} to $dst: $err"; + die $err; } } sub _maildir_write_cb ($$) { my ($self, $lei) = @_; my $dedupe = $lei->{dedupe}; - $dedupe->prepare_dedupe; + $dedupe->prepare_dedupe if $dedupe; my $dst = $lei->{ovv}->{dst}; sub { # for git_to_mail - my ($buf, $smsg) = @_; + my ($buf, $smsg, $eml) = @_; + $dst // return $lei->fail; # dst may be undef-ed in last run + $buf //= \($eml->as_string); return _buf2maildir($dst, $buf, $smsg) if !$dedupe; - my $eml = PublicInbox::Eml->new($$buf); # copy buf + $eml //= PublicInbox::Eml->new($$buf); # copy buf return if $dedupe->is_dup($eml, $smsg->{blob}); undef $eml; _buf2maildir($dst, $buf, $smsg); @@ -345,11 +343,18 @@ sub new { my $dst = $lei->{ovv}->{dst}; my $self = bless {}, $cls; if ($fmt eq 'maildir') { + $maildir_each_file //= do { + require PublicInbox::MdirReader; + PublicInbox::MdirReader->can('maildir_each_file'); + }; + $lei->{opt}->{augment} and + require PublicInbox::InboxWritable; # eml_from_path $self->{base_type} = 'maildir'; -e $dst && !-d _ and die "$dst exists and is not a directory\n"; $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/'; } elsif (substr($fmt, 0, 4) eq 'mbox') { + require PublicInbox::MboxReader if $lei->{opt}->{augment}; (-d $dst || (-e _ && !-w _)) and die "$dst exists and is not a writable file\n"; $self->can("eml2$fmt") or die "bad mbox --format=$fmt\n"; @@ -357,11 +362,22 @@ sub new { } else { die "bad mail --format=$fmt\n"; } + $self->{dst} = $dst; $lei->{dedupe} = PublicInbox::LeiDedupe->new($lei); $self; } -sub _pre_augment_maildir {} # noop +sub _pre_augment_maildir { + my ($self, $lei) = @_; + my $dst = $lei->{ovv}->{dst}; + for my $x (qw(tmp new cur)) { + my $d = $dst.$x; + next if -d $d; + require File::Path; + File::Path::mkpath($d); + -d $d or die "$d is not a directory"; + } +} sub _do_augment_maildir { my ($self, $lei) = @_; @@ -369,26 +385,15 @@ sub _do_augment_maildir { if ($lei->{opt}->{augment}) { my $dedupe = $lei->{dedupe}; if ($dedupe && $dedupe->prepare_dedupe) { - require PublicInbox::InboxWritable; # eml_from_path - _maildir_each_file($dst, \&_augment_file, $lei); + $maildir_each_file->($dst, \&_augment_file, $lei); $dedupe->pause_dedupe; } } else { # clobber existing Maildir - _maildir_each_file($dst, \&_unlink); + $maildir_each_file->($dst, \&_unlink); } } -sub _post_augment_maildir { - my ($self, $lei) = @_; - my $dst = $lei->{ovv}->{dst}; - for my $x (qw(tmp new cur)) { - my $d = $dst.$x; - next if -d $d; - require File::Path; - File::Path::mkpath($d); - -d $d or die "$d is not a directory"; - } -} +sub _post_augment_maildir {} # noop sub _pre_augment_mbox { my ($self, $lei) = @_; @@ -399,6 +404,7 @@ sub _pre_augment_mbox { $! == ENOENT or die "unlink($dst): $!"; } open my $out, $mode, $dst or die "open($dst): $!"; + $lei->{old_1} = $lei->{1}; # keep for spawning MUA $lei->{1} = $out; } # Perl does SEEK_END even with O_APPEND :< @@ -409,7 +415,7 @@ sub _pre_augment_mbox { state $zsfx_allow = join('|', keys %zsfx2cmd); ($self->{zsfx}) = ($dst =~ /\.($zsfx_allow)\z/) or return; pipe(my ($r, $w)) or die "pipe: $!"; - [ $r, $w ]; + $lei->{zpipe} = [ $r, $w ]; } sub _do_augment_mbox { @@ -424,7 +430,6 @@ sub _do_augment_mbox { my $rd = $zsfx ? decompress_src($out, $zsfx, $lei) : dup_src($out); my $fmt = $lei->{ovv}->{fmt}; - require PublicInbox::MboxReader; PublicInbox::MboxReader->$fmt($rd, \&_augment, $lei); } # maybe some systems don't honor O_APPEND, Perl does this: @@ -432,7 +437,7 @@ sub _do_augment_mbox { $dedupe->pause_dedupe if $dedupe; } -sub pre_augment { # fast (1 disk seek), runs in main daemon +sub pre_augment { # fast (1 disk seek), runs in same process as post_augment my ($self, $lei) = @_; # _pre_augment_maildir, _pre_augment_mbox my $m = "_pre_augment_$self->{base_type}"; @@ -446,48 +451,53 @@ sub do_augment { # slow, runs in wq worker $self->$m($lei); } -sub post_augment { # fast (spawn compressor or mkdir), runs in main daemon +# fast (spawn compressor or mkdir), runs in same process as pre_augment +sub post_augment { my ($self, $lei, @args) = @_; # _post_augment_maildir, _post_augment_mbox my $m = "_post_augment_$self->{base_type}"; $self->$m($lei, @args); } -sub write_mail { # via ->wq_do - my ($self, $git_dir, $smsg, $lei) = @_; - my $not_done = delete $self->{4}; # write end of {each_smsg_done} - my $wcb = $self->{wcb} //= do { # first message - my %sig = $lei->atfork_child_wq($self); - @SIG{keys %sig} = values %sig; # not local - $lei->{dedupe}->prepare_dedupe; - $self->write_cb($lei); - }; - my $git = $self->{"$$\0$git_dir"} //= PublicInbox::Git->new($git_dir); - $git->cat_async($smsg->{blob}, \&git_to_mail, [$wcb, $smsg, $not_done]); +sub ipc_atfork_child { + my ($self) = @_; + my $lei = delete $self->{lei}; + $lei->lei_atfork_child; + if (my $zpipe = delete $lei->{zpipe}) { + $lei->{1} = $zpipe->[1]; + close $zpipe->[0]; + } + $self->{wcb} = $self->write_cb($lei); + $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); + $self->SUPER::ipc_atfork_child; } -sub ipc_atfork_prepare { - my ($self) = @_; - # (done_wr, stdout|mbox, stderr, 3: sock, 4: each_smsg_done_wr) - $self->wq_set_recv_modes(qw[+<&= >&= >&= +<&= >&=]); - $self->SUPER::ipc_atfork_prepare; # PublicInbox::IPC +sub lock_free { + $_[0]->{base_type} =~ /\A(?:maildir|mh|imap|jmap)\z/ ? 1 : 0; } -# We rely on OnDestroy to run this before ->DESTROY, since ->DESTROY -# ordering is unstable at worker exit and may cause segfaults -sub reap_gits { +sub poke_dst { my ($self) = @_; - for my $git (delete @$self{grep(/\A$$\0/, keys %$self)}) { - $git->async_wait_all; + if ($self->{base_type} eq 'maildir') { + my $t = time + 1; + utime($t, $t, "$self->{dst}/cur"); } } -sub ipc_atfork_child { # runs after IPC::wq_worker_loop +sub write_mail { # via ->wq_io_do + my ($self, $git_dir, $smsg) = @_; + my $git = $self->{"$$\0$git_dir"} //= PublicInbox::Git->new($git_dir); + git_async_cat($git, $smsg->{blob}, \&git_to_mail, + [$self->{wcb}, $smsg]); +} + +sub wq_atexit_child { my ($self) = @_; - $self->SUPER::ipc_atfork_child; - # reap_gits needs to run before $self->DESTROY, - # IPC.pm will ensure that. - PublicInbox::OnDestroy->new($$, \&reap_gits, $self); + delete $self->{wcb}; + for my $git (delete @$self{grep(/\A$$\0/, keys %$self)}) { + $git->async_wait_all; + } + $SIG{__WARN__} = 'DEFAULT'; } 1;