X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FGit.pm;h=3c577ab317b29339271f468f3538345ed14690ca;hb=63d7b8ceee55a34cde983e8548d5ce61050d2891;hp=d53427d770f19acc4360851c65243337e6767fb7;hpb=f68c45d33656a4602c2d8b8c1a8be813f8a9a70d;p=public-inbox.git diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index d53427d7..3c577ab3 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2020 all contributors +# Copyright (C) 2014-2021 all contributors # License: GPLv2 or later # # Used to read files from a git repository without excessive forking. @@ -12,23 +12,22 @@ use v5.10.1; use parent qw(Exporter); use POSIX (); use IO::Handle; # ->autoflush -use Errno qw(EINTR EAGAIN); +use Errno qw(EINTR EAGAIN ENOENT); use File::Glob qw(bsd_glob GLOB_NOSORT); use File::Spec (); use Time::HiRes qw(stat); -use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::Spawn qw(popen_rd spawn); use PublicInbox::Tmpfile; use IO::Poll qw(POLLIN); use Carp qw(croak); use Digest::SHA (); +use PublicInbox::DS qw(dwaitpid); our @EXPORT_OK = qw(git_unquote git_quote); our $PIPE_BUFSIZ = 65536; # Linux default our $in_cleanup; our $RDTIMEO = 60_000; # milliseconds -use constant MAX_INFLIGHT => - (($^O eq 'linux' ? 4096 : POSIX::_POSIX_PIPE_BUF()) * 3) - / +use constant MAX_INFLIGHT => (POSIX::PIPE_BUF * 3) / 65; # SHA-256 hex size + "\n" in preparation for git using non-SHA1 my %GIT_ESC = ( @@ -49,14 +48,13 @@ my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC; sub git_unquote ($) { return $_[0] unless ($_[0] =~ /\A"(.*)"\z/); $_[0] = $1; - $_[0] =~ s/\\([\\"abfnrtv])/$GIT_ESC{$1}/g; - $_[0] =~ s/\\([0-7]{1,3})/chr(oct($1))/ge; + $_[0] =~ s!\\([\\"abfnrtv]|[0-3][0-7]{2})!$GIT_ESC{$1}//chr(oct($1))!ge; $_[0]; } sub git_quote ($) { if ($_[0] =~ s/([\\"\a\b\f\n\r\t\013]|[^[:print:]])/ - '\\'.($ESC_GIT{$1}||sprintf("%0o",ord($1)))/egs) { + '\\'.($ESC_GIT{$1}||sprintf("%03o",ord($1)))/egs) { return qq{"$_[0]"}; } $_[0]; @@ -96,9 +94,9 @@ sub alternates_changed { sub last_check_err { my ($self) = @_; my $fh = $self->{err_c} or return; - sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!"); + sysseek($fh, 0, 0) or $self->fail("sysseek failed: $!"); defined(sysread($fh, my $buf, -s $fh)) or - fail($self, "sysread failed: $!"); + $self->fail("sysread failed: $!"); $buf; } @@ -107,24 +105,25 @@ sub _bidi_pipe { if ($self->{$pid}) { if (defined $err) { # "err_c" my $fh = $self->{$err}; - sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!"); - truncate($fh, 0) or fail($self, "truncate failed: $!"); + sysseek($fh, 0, 0) or $self->fail("sysseek failed: $!"); + truncate($fh, 0) or $self->fail("truncate failed: $!"); } return; } my ($out_r, $out_w); - pipe($out_r, $out_w) or fail($self, "pipe failed: $!"); + pipe($out_r, $out_w) or $self->fail("pipe failed: $!"); my @cmd = (qw(git), "--git-dir=$self->{git_dir}", qw(-c core.abbrev=40 cat-file), $batch); my $redir = { 0 => $out_r }; if ($err) { my $id = "git.$self->{git_dir}$batch.err"; - my $fh = tmpfile($id) or fail($self, "tmpfile($id): $!"); + my $fh = tmpfile($id) or $self->fail("tmpfile($id): $!"); $self->{$err} = $fh; $redir->{2} = $fh; } my ($in_r, $p) = popen_rd(\@cmd, undef, $redir); $self->{$pid} = $p; + $self->{"$pid.owner"} = $$; $out_w->autoflush(1); if ($^O eq 'linux') { # 1031: F_SETPIPE_SZ fcntl($out_w, 1031, 4096); @@ -187,7 +186,7 @@ sub cat_async_retry ($$$$$) { for (my $i = 0; $i < @$inflight; $i += 3) { $buf .= "$inflight->[$i]\n"; } - print { $self->{out} } $buf or fail($self, "write error: $!"); + print { $self->{out} } $buf or $self->fail("write error: $!"); unshift(@$inflight, \$req, $cb, $arg); # \$ref to indicate retried cat_async_step($self, $inflight); # take one step @@ -238,17 +237,16 @@ sub batch_prepare ($) { } sub _cat_file_cb { - my ($bref, undef, undef, $size, $result) = @_; - @$result = ($bref, $size); + my ($bref, $oid, $type, $size, $result) = @_; + @$result = ($bref, $oid, $type, $size); } sub cat_file { - my ($self, $oid, $sizeref) = @_; + my ($self, $oid) = @_; my $result = []; cat_async($self, $oid, \&_cat_file_cb, $result); cat_async_wait($self); - $$sizeref = $result->[1] if $sizeref; - $result->[0]; + wantarray ? @$result : $result->[0]; } sub check_async_step ($$) { @@ -265,7 +263,7 @@ sub check_async_step ($$) { # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/ if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') { my $ret = my_read($self->{in_c}, $rbuf, $type + 1); - fail($self, defined($ret) ? 'read EOF' : "read: $!") if !$ret; + $self->fail(defined($ret) ? 'read EOF' : "read: $!") if !$ret; } $self->{chk_rbuf} = $rbuf if $$rbuf ne ''; eval { $cb->($hex, $type, $size, $arg, $self) }; @@ -294,7 +292,7 @@ sub check_async ($$$$) { while (scalar(@$inflight_c) >= MAX_INFLIGHT) { check_async_step($self, $inflight_c); } - print { $self->{out_c} } $oid, "\n" or fail($self, "write error: $!"); + print { $self->{out_c} } $oid, "\n" or $self->fail("write error: $!"); push(@$inflight_c, $oid, $cb, $arg); } @@ -326,10 +324,7 @@ sub _destroy { # GitAsyncCat::event_step may delete {pid} my $p = delete $self->{$pid} or return; - - # PublicInbox::DS may not be loaded - eval { PublicInbox::DS::dwaitpid($p, undef, undef) }; - waitpid($p, 0) if $@; # wait synchronously if not in event loop + dwaitpid($p) if $$ == $self->{"$pid.owner"}; } sub cat_async_abort ($) { @@ -347,25 +342,41 @@ sub cat_async_abort ($) { cleanup($self); } -sub fail { +sub fail { # may be augmented in subclasses my ($self, $msg) = @_; cat_async_abort($self); croak(ref($self) . ' ' . ($self->{git_dir} // '') . ": $msg"); } +# $git->popen(qw(show f00)); # or +# $git->popen(qw(show f00), { GIT_CONFIG => ... }, { 2 => ... }); sub popen { - my ($self, @cmd) = @_; - @cmd = ('git', "--git-dir=$self->{git_dir}", @cmd); - popen_rd(\@cmd); + my ($self, $cmd) = splice(@_, 0, 2); + $cmd = [ 'git', "--git-dir=$self->{git_dir}", + ref($cmd) ? @$cmd : ($cmd, grep { defined && !ref } @_) ]; + popen_rd($cmd, grep { !defined || ref } @_); # env and opt } +# same args as popen above sub qx { - my ($self, @cmd) = @_; - my $fh = $self->popen(@cmd); - local $/ = "\n"; - return <$fh> if wantarray; - local $/; - <$fh> + my $fh = popen(@_); + if (wantarray) { + my @ret = <$fh>; + close $fh; # caller should check $? + @ret; + } else { + local $/; + my $ret = <$fh>; + close $fh; # caller should check $? + $ret; + } +} + +sub date_parse { + my $self = shift; + map { + substr($_, length('--max-age='), -1) + } $self->qx('rev-parse', map { "--since=$_" } @_); } # check_async and cat_async may trigger the other, so ensure they're @@ -389,7 +400,7 @@ sub cleanup { delete $self->{inflight_c}; _destroy($self, qw(cat_rbuf in out pid)); _destroy($self, qw(chk_rbuf in_c out_c pid_c err_c)); - !!($self->{pid} || $self->{pid_c}); + defined($self->{pid}) || defined($self->{pid_c}); } @@ -412,8 +423,8 @@ sub local_nick ($) { my ($self) = @_; my $ret = '???'; # don't show full FS path, basename should be OK: - if ($self->{git_dir} =~ m!/([^/]+)(?:/\.git)?\z!) { - $ret = "/path/to/$1"; + if ($self->{git_dir} =~ m!/([^/]+)(?:/*\.git/*)?\z!) { + $ret = "$1.git"; } wantarray ? ($ret) : $ret; } @@ -449,46 +460,17 @@ sub cat_async ($$$;$) { while (scalar(@$inflight) >= MAX_INFLIGHT) { cat_async_step($self, $inflight); } - print { $self->{out} } $oid, "\n" or fail($self, "write error: $!"); + print { $self->{out} } $oid, "\n" or $self->fail("write error: $!"); push(@$inflight, $oid, $cb, $arg); } -sub async_prefetch { - my ($self, $oid, $cb, $arg) = @_; - if (my $inflight = $self->{inflight}) { - # we could use MAX_INFLIGHT here w/o the halving, - # but lets not allow one client to monopolize a git process - if (scalar(@$inflight) < int(MAX_INFLIGHT/2)) { - print { $self->{out} } $oid, "\n" or - fail($self, "write error: $!"); - return push(@$inflight, $oid, $cb, $arg); - } - } - undef; -} - -sub extract_cmt_time { - my ($bref, undef, undef, undef, $modified) = @_; - - if ($$bref =~ /^committer .*?> ([0-9]+) [\+\-]?[0-9]+/sm) { - my $cmt_time = $1 + 0; - $$modified = $cmt_time if $cmt_time > $$modified; - } -} - # returns the modified time of a git repo, same as the "modified" field # of a grokmirror manifest sub modified ($) { - my ($self) = @_; - my $modified = 0; - my $fh = popen($self, qw(rev-parse --branches)); - local $/ = "\n"; - while (my $oid = <$fh>) { - chomp $oid; - cat_async($self, $oid, \&extract_cmt_time, \$modified); - } - cat_async_wait($self); - $modified || time; + # committerdate:unix is git 2.9.4+ (2017-05-05), so using raw instead + my $fh = popen($_[0], qw[for-each-ref --sort=-committerdate + --format=%(committerdate:raw) --count=1]); + (split(/ /, <$fh> // time))[0] + 0; # integerize for JSON } # for grokmirror, which doesn't read gitweb.description @@ -497,14 +479,13 @@ sub modified ($) { # templates/this--description in git.git sub manifest_entry { my ($self, $epoch, $default_desc) = @_; - my ($fh, $pid) = $self->popen('show-ref'); + my $fh = $self->popen('show-ref'); my $dig = Digest::SHA->new(1); while (read($fh, my $buf, 65536)) { $dig->add($buf); } - close $fh; - waitpid($pid, 0); - return if $?; # empty, uninitialized git repo + close $fh or return; # empty, uninitialized git repo + undef $fh; # for open, below my $git_dir = $self->{git_dir}; my $ent = { fingerprint => $dig->hexdigest, @@ -542,6 +523,27 @@ sub manifest_entry { $ent; } +# returns true if there are pending cat-file processes +sub cleanup_if_unlinked { + my ($self) = @_; + return cleanup($self) if $^O ne 'linux'; + # Linux-specific /proc/$PID/maps access + # TODO: support this inside git.git + my $ret = 0; + for my $fld (qw(pid pid_c)) { + my $pid = $self->{$fld} // next; + open my $fh, '<', "/proc/$pid/maps" or return cleanup($self); + while (<$fh>) { + # n.b. we do not restart for unlinked multi-pack-index + # since it's not too huge, and the startup cost may + # be higher. + return cleanup($self) if /\.(?:idx|pack) \(deleted\)$/; + } + ++$ret; + } + $ret; +} + 1; __END__ =pod