X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FGitAsyncCat.pm;h=cea3f539234ab2cb3864865c2798c5e073a4ba8d;hb=23af251dd607c4e75ab1e68063f2c885c48cc035;hp=b9dbe0ccf2251a0d434d6260880dc0d240bb431b;hpb=d78f50649a5545d66a61b5465ca7f5ce4be398ea;p=public-inbox.git diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm index b9dbe0cc..cea3f539 100644 --- a/lib/PublicInbox/GitAsyncCat.pm +++ b/lib/PublicInbox/GitAsyncCat.pm @@ -1,80 +1,82 @@ -# Copyright (C) 2020 all contributors +# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # # internal class used by PublicInbox::Git + PublicInbox::DS # This parses the output pipe of "git cat-file --batch" -# -# Note: this does NOT set the non-blocking flag, we expect `git cat-file' -# to be a local process, and git won't start writing a blob until it's -# fully read. So minimize context switching and read as much as possible -# and avoid holding a buffer in our heap any longer than it has to live. package PublicInbox::GitAsyncCat; use strict; use parent qw(PublicInbox::DS Exporter); use POSIX qw(WNOHANG); use PublicInbox::Syscall qw(EPOLLIN EPOLLET); -our @EXPORT = qw(git_async_cat git_async_prefetch); +our @EXPORT = qw(ibx_async_cat ibx_async_prefetch); use PublicInbox::Git (); our $GCF2C; # singleton PublicInbox::Gcf2Client sub close { my ($self) = @_; - - if (my $gitish = delete $self->{gitish}) { - PublicInbox::Git::cat_async_abort($gitish); + if (my $git = delete $self->{git}) { + $git->async_abort; } $self->SUPER::close; # PublicInbox::DS::close } sub event_step { my ($self) = @_; - my $gitish = $self->{gitish} or return; - return $self->close if ($gitish->{in} // 0) != ($self->{sock} // 1); - my $inflight = $gitish->{inflight}; + my $git = $self->{git} or return; + return $self->close if ($git->{in} // 0) != ($self->{sock} // 1); + my $inflight = $git->{inflight}; if ($inflight && @$inflight) { - $gitish->cat_async_step($inflight); + $git->cat_async_step($inflight); # child death? - if (($gitish->{in} // 0) != ($self->{sock} // 1)) { + if (($git->{in} // 0) != ($self->{sock} // 1)) { $self->close; - } elsif (@$inflight || exists $gitish->{cat_rbuf}) { + } elsif (@$inflight || exists $git->{rbuf}) { # ok, more to do, requeue for fairness $self->requeue; } - } elsif ((my $pid = waitpid($gitish->{pid}, WNOHANG)) > 0) { + } elsif ((my $pid = waitpid($git->{pid}, WNOHANG)) > 0) { # May happen if the child process is killed by a BOFH # (or segfaults) - delete $gitish->{pid}; - warn "E: gitish $pid exited with \$?=$?\n"; + delete $git->{pid}; + warn "E: git $pid exited with \$?=$?\n"; $self->close; } } -sub git_async_cat ($$$$) { - my ($git, $oid, $cb, $arg) = @_; - my $gitish = $GCF2C; - if ($gitish) { - $oid .= " $git->{git_dir}"; - } else { - $gitish = $git; +sub ibx_async_cat ($$$$) { + my ($ibx, $oid, $cb, $arg) = @_; + my $git = $ibx->git; + # {topdir} means ExtSearch (likely [extindex "all"]) with potentially + # 100K alternates. git(1) has a proposed patch for 100K alternates: + # + if (!defined($ibx->{topdir}) && ($GCF2C //= eval { + require PublicInbox::Gcf2Client; + PublicInbox::Gcf2Client::new(); + } // 0)) { # 0: do not retry if libgit2 or Inline::C are missing + $GCF2C->gcf2_async(\"$oid $git->{git_dir}\n", $cb, $arg); + \undef; + } else { # read-only end of git-cat-file pipe + $git->cat_async($oid, $cb, $arg); + $git->{async_cat} //= do { + my $self = bless { git => $git }, __PACKAGE__; + $git->{in}->blocking(0); + $self->SUPER::new($git->{in}, EPOLLIN|EPOLLET); + \undef; # this is a true ref() + }; } - $gitish->cat_async($oid, $cb, $arg); - $gitish->{async_cat} //= do { - my $self = bless { gitish => $gitish }, __PACKAGE__; - $self->SUPER::new($gitish->{in}, EPOLLIN|EPOLLET); - \undef; # this is a true ref() - }; } # this is safe to call inside $cb, but not guaranteed to enqueue # returns true if successful, undef if not. -sub git_async_prefetch { - my ($git, $oid, $cb, $arg) = @_; - if ($GCF2C) { - if ($GCF2C->{async_cat} && !$GCF2C->{wbuf}) { - $oid .= " $git->{git_dir}"; - return $GCF2C->cat_async($oid, $cb, $arg); +sub ibx_async_prefetch { + my ($ibx, $oid, $cb, $arg) = @_; + my $git = $ibx->git; + if (!defined($ibx->{topdir}) && $GCF2C) { + if (!$GCF2C->{wbuf}) { + $oid .= " $git->{git_dir}\n"; + return $GCF2C->gcf2_async(\$oid, $cb, $arg); # true } } elsif ($git->{async_cat} && (my $inflight = $git->{inflight})) { # we could use MAX_INFLIGHT here w/o the halving,