X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FGcf2.pm;h=041dffe7d9c7b26396ab0a97f7b7deaf008c6896;hb=0d38f65c490466837ae091afa7a7b6f59d04ce7c;hp=fe76b1fdaaf2a61881d08c51718ba4f680d0a98e;hpb=3750b2e2952e55fe4a04c73fc78f25c5e07d0525;p=public-inbox.git diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm index fe76b1fd..041dffe7 100644 --- a/lib/PublicInbox/Gcf2.pm +++ b/lib/PublicInbox/Gcf2.pm @@ -1,12 +1,13 @@ # Copyright (C) 2020 all contributors # License: AGPL-3.0+ -# backend for public-inbox-gcf2(1) (git-cat-file based on libgit2, -# other libgit2 stuff may go here, too) +# backend for a git-cat-file-workalike based on libgit2, +# other libgit2 stuff may go here, too. package PublicInbox::Gcf2; use strict; use PublicInbox::Spawn qw(which popen_rd); use Fcntl qw(LOCK_EX); +use IO::Handle; # autoflush my (%CFG, $c_src, $lockfh); BEGIN { # PublicInbox::Spawn will set PERL_INLINE_DIRECTORY @@ -54,4 +55,56 @@ use Inline C => $c_src; undef $c_src; undef %CFG; undef $lockfh; + +sub add_alt ($$) { + my ($gcf2, $objdir) = @_; + + # libgit2 (tested 0.27.7+dfsg.1-0.2 and 0.28.3+dfsg.1-1~bpo10+1 + # in Debian) doesn't handle relative epochs properly when nested + # multiple levels. Add all the absolute paths to workaround it, + # since $EXTINDEX_DIR/ALL.git/objects/info/alternates uses absolute + # paths to reference $V2INBOX_DIR/all.git/objects and + # $V2INBOX_DIR/all.git/objects/info/alternates uses relative paths + # to refer to $V2INBOX_DIR/git/$EPOCH.git/objects + # + # See https://bugs.debian.org/975607 + if (open(my $fh, '<', "$objdir/info/alternates")) { + chomp(my @abs_alt = grep(m!^/!, <$fh>)); + $gcf2->add_alternate($_) for @abs_alt; + } + $gcf2->add_alternate($objdir); +} + +# Usage: $^X -MPublicInbox::Gcf2 -e 'PublicInbox::Gcf2::loop()' +# (see lib/PublicInbox/Gcf2Client.pm) +sub loop { + my $gcf2 = new(); + my %seen; + STDERR->autoflush(1); + STDOUT->autoflush(1); + + while () { + chomp; + my ($oid, $git_dir) = split(/ /, $_, 2); + $seen{$git_dir}++ or add_alt($gcf2, "$git_dir/objects"); + if (!$gcf2->cat_oid(1, $oid)) { + # retry once if missing. We only get unabbreviated OIDs + # from SQLite or Xapian DBs, here, so malicious clients + # can't trigger excessive retries: + warn "I: $$ $oid missing, retrying in $git_dir\n"; + + $gcf2 = new(); + %seen = ($git_dir => 1); + add_alt($gcf2, "$git_dir/objects"); + + if ($gcf2->cat_oid(1, $oid)) { + warn "I: $$ $oid found after retry\n"; + } else { + warn "W: $$ $oid missing after retry\n"; + print "$oid missing\n"; # mimic git-cat-file + } + } + } +} + 1;