From 26e1b36ce403e9ead26407987a8f3a50ff2ed77c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 28 Nov 2022 05:31:10 +0000 Subject: [PATCH] lei_mirror: simplify _get_txt_start callers We can avoid needless select()-based sleeps by always using TMPDIR for temporary files, and just slurping the small config or description file. This will make it easier to reuse the description from the manifest in the next commit. --- lib/PublicInbox/LeiMirror.pm | 87 +++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 0696c2d9..da8987be 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -106,45 +106,50 @@ sub ft_rename ($$$) { } sub _get_txt_start { # non-fatal - my ($self, $endpoint, $file, $mode) = @_; + my ($self, $endpoint, $fini) = @_; my $uri = URI->new($self->{cur_src} // $self->{src}); my $lei = $self->{lei}; my $path = $uri->path; chop($path) eq '/' or die "BUG: $uri not canonicalized"; $uri->path("$path/$endpoint"); - my $dst = $self->{cur_dst} // $self->{dst}; - my $ft = File::Temp->new(TEMPLATE => "$file-XXXX", DIR => $dst); + my $f = (split(m!/!, $endpoint))[-1]; + my $ft = File::Temp->new(TEMPLATE => "$f-XXXX", TMPDIR => 1); my $opt = { 0 => $lei->{0}, 1 => $lei->{1}, 2 => $lei->{2} }; - my $cmd = $self->{curl}->for_uri($lei, $uri, - qw(--compressed -R -o), $ft->filename); - $self->{-get_txt} = [ $ft, $cmd, $uri, $file, $mode ]; + my $cmd = $self->{curl}->for_uri($lei, $uri, qw(--compressed -R -o), + $ft->filename); + $self->{"-get_txt.$endpoint"} = [ $ft, $cmd, $uri ]; $lei->qerr("# @$cmd"); - spawn($cmd, undef, $opt); + my $jobs = $lei->{opt}->{jobs} // 1; + reap_live() while keys(%LIVE) >= $jobs; + $LIVE{spawn($cmd, undef, $opt)} = + [ \&_get_txt_done, $self, $endpoint, $fini ]; } sub _get_txt_done { # returns true on error (non-fatal), undef on success - my ($self) = @_; - my ($ft, $cmd, $uri, $file, $mode) = @{delete $self->{-get_txt}}; + my ($self, $endpoint) = @_; + my ($fh, $cmd, $uri) = @{delete $self->{"-get_txt.$endpoint"}}; my $cerr = $?; - $? = 0; + $? = 0; # don't influence normal lei exit return warn("$uri missing\n") if ($cerr >> 8) == 22; return warn("# @$cmd failed (non-fatal)\n") if $cerr; - my $dst = $self->{cur_dst} // $self->{dst}; - ft_rename($ft, "$dst/$file", $mode); + seek($fh, SEEK_SET, 0) or die "seek: $!"; + $self->{"mtime.$endpoint"} = (stat($fh))[9]; + local $/; + $self->{"txt.$endpoint"} = <$fh>; undef; # success } -# tries the relatively new /$INBOX/_/text/config/raw endpoint -sub _try_config_start { +sub _write_inbox_config { my ($self) = @_; - _get_txt_start($self, qw(_/text/config/raw inbox.config.example), 0444); -} - -sub _try_config_done { - my ($self) = @_; - _get_txt_done($self) and return; + my $buf = delete($self->{'txt._/text/config/raw'}) // return; my $dst = $self->{cur_dst} // $self->{dst}; my $f = "$dst/inbox.config.example"; + open my $fh, '>', $f or die "open($f): $!"; + print $fh $buf or die "print: $!"; + chmod(0444 & ~umask, $fh) or die "chmod($f): $!"; + my $mtime = delete $self->{'mtime._/text/config/raw'} // die 'BUG: no mtime'; + $fh->flush or die "flush($f): $!"; + utime($mtime, $mtime, $fh) or die "utime($f): $!"; my $cfg = PublicInbox::Config->git_config_dump($f, $self->{lei}->{2}); my $ibx = $self->{ibx} = {}; for my $sec (grep(/\Apublicinbox\./, @{$cfg->{-section_order}})) { @@ -160,15 +165,18 @@ sub set_description ($) { my $f = "$dst/description"; open my $fh, '+>>', $f or die "open($f): $!"; seek($fh, 0, SEEK_SET) or die "seek($f): $!"; - chomp(my $d = do { local $/; <$fh> } // die "read($f): $!"); - if ($d eq '($INBOX_DIR/description missing)' || - $d =~ /^Unnamed repository/ || $d !~ /\S/) { - seek($fh, 0, SEEK_SET) or die "seek($f): $!"; - truncate($fh, 0) or die "truncate($f): $!"; - my $src = $self->{cur_src} // $self->{src}; - print $fh "mirror of $src\n" or die "print($f): $!"; - close $fh or die "close($f): $!"; + my $d = do { local $/; <$fh> } // die "read($f): $!"; + my $orig = $d; + while (defined($d) && ($d =~ m!^\(\$INBOX_DIR/description missing\)! || + $d =~ /^Unnamed repository/ || $d !~ /\S/)) { + $d = delete($self->{'txt.description'}); } + $d //= 'mirror of '.($self->{cur_src} // $self->{src})."\n"; + return if $d eq $orig; + seek($fh, 0, SEEK_SET) or die "seek($f): $!"; + truncate($fh, 0) or die "truncate($f): $!"; + print $fh $d or die "print($f): $!"; + close $fh or die "close($f): $!"; } sub index_cloned_inbox { @@ -229,12 +237,8 @@ sub clone_v1 { $LIVE{spawn($cmd, undef, $opt)} = [ \&reap_clone, $lei, $cmd, $fini ]; reap_live() while keys(%LIVE) >= $jobs; - # wait for `git clone' to mkdir $dst (TODO: inotify/kevent?) - select(undef, undef, undef, 0.011) until -d $dst; - $LIVE{_try_config_start($self)} = [ \&_try_config_done, $self, $fini ]; - reap_live() while keys(%LIVE) >= $jobs; - $LIVE{_get_txt_start($self, qw(description description), 0666)} = - [ \&_get_txt_done, $self, $fini ]; + _get_txt_start($self, '_/text/config/raw', $fini); + _get_txt_start($self, 'description', $fini); reap_live() until ($nohang || !keys(%LIVE)); } @@ -310,13 +314,14 @@ sub reap_clone { # async, called via SIGCHLD sub v1_done { # called via OnDestroy my ($self) = @_; - my $dst = $self->{cur_dst} // $self->{dst}; - write_makefile($dst, 1); + _write_inbox_config($self); + write_makefile($self->{cur_dst} // $self->{dst}, 1); index_cloned_inbox($self, 1); } sub v2_done { # called via OnDestroy my ($self) = @_; + _write_inbox_config($self); require PublicInbox::MultiGit; my $dst = $self->{cur_dst} // $self->{dst}; my $mg = PublicInbox::MultiGit->new($dst, 'all.git', 'git'); @@ -378,13 +383,13 @@ failed to extract epoch number from $src } my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock'; my $fini = PublicInbox::OnDestroy->new($$, \&v2_done, $task); - my $jobs = $self->{lei}->{opt}->{jobs} // 1; - $LIVE{_try_config_start($task)} = [ \&_try_config_done, $task, $fini ]; - reap_live() while keys(%LIVE) >= $jobs; - $LIVE{_get_txt_start($self, qw(description description), 0666)} = - [ \&_get_txt_done, $self, $fini ]; + + _get_txt_start($task, '_/text/config/raw', $fini); + _get_txt_start($self, 'description', $fini); + $task->{-locked} = $lk->lock_for_scope($$); my @cmd = clone_cmd($lei, my $opt = {}); + my $jobs = $self->{lei}->{opt}->{jobs} // 1; do { reap_live() while keys(%LIVE) >= $jobs; while (keys(%LIVE) < $jobs && @src_edst && -- 2.44.0