X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FGcf2.pm;h=41ee0715c8560feaf47854488b19fccdb76da13b;hb=23af251dd607c4e75ab1e68063f2c885c48cc035;hp=fe76b1fdaaf2a61881d08c51718ba4f680d0a98e;hpb=3750b2e2952e55fe4a04c73fc78f25c5e07d0525;p=public-inbox.git diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm index fe76b1fd..41ee0715 100644 --- a/lib/PublicInbox/Gcf2.pm +++ b/lib/PublicInbox/Gcf2.pm @@ -1,57 +1,152 @@ -# Copyright (C) 2020 all contributors +# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ -# backend for public-inbox-gcf2(1) (git-cat-file based on libgit2, -# other libgit2 stuff may go here, too) +# backend for a git-cat-file-workalike based on libgit2, +# other libgit2 stuff may go here, too. package PublicInbox::Gcf2; use strict; -use PublicInbox::Spawn qw(which popen_rd); -use Fcntl qw(LOCK_EX); -my (%CFG, $c_src, $lockfh); +use v5.10.1; +use PublicInbox::Spawn qw(which popen_rd); # may set PERL_INLINE_DIRECTORY +use Fcntl qw(LOCK_EX SEEK_SET); +use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); +use IO::Handle; # autoflush BEGIN { + my (%CFG, $c_src); # PublicInbox::Spawn will set PERL_INLINE_DIRECTORY # to ~/.cache/public-inbox/inline-c if it exists my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} // die 'PERL_INLINE_DIRECTORY not defined'; my $f = "$inline_dir/.public-inbox.lock"; - open $lockfh, '>', $f or die "failed to open $f: $!\n"; - my $pc = which($ENV{PKG_CONFIG} // 'pkg-config'); + open my $fh, '+>', $f or die "open($f): $!"; + + # CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking + flock($fh, LOCK_EX) or die "LOCK_EX($f): $!\n"; + + my $pc = which($ENV{PKG_CONFIG} // 'pkg-config') // + die "pkg-config missing for libgit2"; my ($dir) = (__FILE__ =~ m!\A(.+?)/[^/]+\z!); - my $rdr = {}; - open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!"; + my $ef = "$inline_dir/.public-inbox.pkg-config.err"; + open my $err, '+>', $ef or die "open($ef): $!"; for my $x (qw(libgit2)) { - my $l = popen_rd([$pc, '--libs', $x], undef, $rdr); + my $rdr = { 2 => $err }; + my ($l, $pid) = popen_rd([$pc, '--libs', $x], undef, $rdr); $l = do { local $/; <$l> }; + waitpid($pid, 0); next if $?; - my $c = popen_rd([$pc, '--cflags', $x], undef, $rdr); + (my $c, $pid) = popen_rd([$pc, '--cflags', $x], undef, $rdr); $c = do { local $/; <$c> }; + waitpid($pid, 0); next if $?; # note: we name C source files .h to prevent # ExtUtils::MakeMaker from automatically trying to # build them. my $f = "$dir/gcf2_$x.h"; - if (open(my $fh, '<', $f)) { - chomp($l, $c); - local $/; - $c_src = <$fh>; - $CFG{LIBS} = $l; - $CFG{CCFLAGSEX} = $c; - last; - } else { - die "E: $f: $!\n"; - } + open(my $src, '<', $f) or die "E: open($f): $!"; + chomp($l, $c); + local $/; + defined($c_src = <$src>) or die "read $f: $!"; + $CFG{LIBS} = $l; + $CFG{CCFLAGSEX} = $c; + last; + } + unless ($c_src) { + seek($err, 0, SEEK_SET); + $err = do { local $/; <$err> }; + die "E: libgit2 not installed: $err\n"; } - die "E: libgit2 not installed\n" unless $c_src; + open my $oldout, '>&', \*STDOUT or die "dup(1): $!"; + open my $olderr, '>&', \*STDERR or die "dup(2): $!"; + open STDOUT, '>&', $fh or die "1>$f: $!"; + open STDERR, '>&', $fh or die "2>$f: $!"; + STDERR->autoflush(1); + STDOUT->autoflush(1); - # CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking - flock($lockfh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n"; + # we use Capitalized and ALLCAPS for compatibility with old Inline::C + eval <<'EOM'; +use Inline C => Config => %CFG, BOOT => q[git_libgit2_init();]; +use Inline C => $c_src, BUILD_NOISY => 1; +EOM + $err = $@; + open(STDERR, '>&', $olderr) or warn "restore stderr: $!"; + open(STDOUT, '>&', $oldout) or warn "restore stdout: $!"; + if ($err) { + seek($fh, 0, SEEK_SET); + my @msg = <$fh>; + die "Inline::C Gcf2 build failed:\n", $err, "\n", @msg; + } +} + +sub add_alt ($$) { + my ($gcf2, $objdir) = @_; + + # libgit2 (tested 0.27.7+dfsg.1-0.2 and 0.28.3+dfsg.1-1~bpo10+1 + # in Debian) doesn't handle relative epochs properly when nested + # multiple levels. Add all the absolute paths to workaround it, + # since $EXTINDEX_DIR/ALL.git/objects/info/alternates uses absolute + # paths to reference $V2INBOX_DIR/all.git/objects and + # $V2INBOX_DIR/all.git/objects/info/alternates uses relative paths + # to refer to $V2INBOX_DIR/git/$EPOCH.git/objects + # + # See https://bugs.debian.org/975607 + if (open(my $fh, '<', "$objdir/info/alternates")) { + chomp(my @abs_alt = grep(m!^/!, <$fh>)); + $gcf2->add_alternate($_) for @abs_alt; + } + $gcf2->add_alternate($objdir); + 1; +} + +sub have_unlinked_files () { + # FIXME: port gcf2-like over to git.git so we won't need to + # deal with libgit2 + return 1 if $^O ne 'linux'; + open my $fh, '<', "/proc/$$/maps" or return; + while (<$fh>) { return 1 if /\.(?:idx|pack) \(deleted\)$/ } + undef; +} + +# Usage: $^X -MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop [EXPIRE-TIMEOUT] +# (see lib/PublicInbox/Gcf2Client.pm) +sub loop (;$) { + my $exp = $_[0] || $ARGV[0] || 60; # seconds + my $gcf2 = new(); + my (%seen, $check_at); + STDERR->autoflush(1); + STDOUT->autoflush(1); + + while () { + chomp; + my ($oid, $git_dir) = split(/ /, $_, 2); + $seen{$git_dir} //= add_alt($gcf2, "$git_dir/objects"); + if (!$gcf2->cat_oid(1, $oid)) { + # retry once if missing. We only get unabbreviated OIDs + # from SQLite or Xapian DBs, here, so malicious clients + # can't trigger excessive retries: + warn "I: $$ $oid missing, retrying in $git_dir\n"; + + $gcf2 = new(); + %seen = ($git_dir => add_alt($gcf2,"$git_dir/objects")); + $check_at = clock_gettime(CLOCK_MONOTONIC) + $exp; + + if ($gcf2->cat_oid(1, $oid)) { + warn "I: $$ $oid found after retry\n"; + } else { + warn "W: $$ $oid missing after retry\n"; + print "$oid missing\n"; # mimic git-cat-file + } + } else { # check expiry to deal with deleted pack files + my $now = clock_gettime(CLOCK_MONOTONIC); + $check_at //= $now + $exp; + if ($now > $check_at) { + undef $check_at; + if (have_unlinked_files()) { + $gcf2 = new(); + %seen = (); + } + } + } + } } -# we use Capitalized and ALLCAPS for compatibility with old Inline::C -use Inline C => Config => %CFG, BOOT => 'git_libgit2_init();'; -use Inline C => $c_src; -undef $c_src; -undef %CFG; -undef $lockfh; 1;