X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FFetch.pm;h=5261cad198552883ce162ea0fa3b84e908939da1;hb=23af251dd607c4e75ab1e68063f2c885c48cc035;hp=d795731ccb62eed5d9d2fe8a43c8ae792f56bd9b;hpb=b45a1dffa647f6427d0c900fcc55753db7a1994c;p=public-inbox.git diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm index d795731c..5261cad1 100644 --- a/lib/PublicInbox/Fetch.pm +++ b/lib/PublicInbox/Fetch.pm @@ -6,19 +6,20 @@ use strict; use v5.10.1; use parent qw(PublicInbox::IPC); use URI (); -use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::Spawn qw(popen_rd run_die spawn); use PublicInbox::Admin; use PublicInbox::LEI; use PublicInbox::LeiCurl; use PublicInbox::LeiMirror; -use IO::Uncompress::Gunzip qw(gunzip $GunzipError); use File::Temp (); +use PublicInbox::Config; +use IO::Compress::Gzip qw(gzip $GzipError); sub new { bless {}, __PACKAGE__ } -sub fetch_cmd ($$) { +sub fetch_args ($$) { my ($lei, $opt) = @_; - my @cmd = qw(git); + my @cmd; # (git --git-dir=...) to be added by caller $opt->{$_} = $lei->{$_} for (0..2); # we support "-c $key=$val" for arbitrary git config options # e.g.: git -c http.proxy=socks5h://127.0.0.1:9050 @@ -30,78 +31,50 @@ sub fetch_cmd ($$) { } sub remote_url ($$) { - my ($lei, $dir) = @_; # TODO: support non-"origin"? - my $cmd = [ qw(git config remote.origin.url) ]; - my $fh = popen_rd($cmd, undef, { -C => $dir, 2 => $lei->{2} }); - my $url = <$fh>; - close $fh or return; - chomp $url; - $url; + my ($lei, $dir) = @_; + my $rn = $lei->{opt}->{'try-remote'} // [ 'origin', '_grokmirror' ]; + for my $r (@$rn) { + my $cmd = [ qw(git config), "remote.$r.url" ]; + my $fh = popen_rd($cmd, undef, { -C => $dir, 2 => $lei->{2} }); + my $url = <$fh>; + close $fh or next; + $url =~ s!/*\n!!s; + return $url; + } + undef } -sub do_fetch { - my ($cls, $lei, $cd) = @_; - my $ibx_ver; - my $curl = PublicInbox::LeiCurl->new($lei) or return; - my $dir = PublicInbox::Admin::resolve_inboxdir($cd, \$ibx_ver); - if ($ibx_ver == 1) { - my $url = remote_url($lei, $dir) // - die "E: $dir missing remote.origin.url\n"; - my $uri = URI->new($url); - my $torsocks = $curl->torsocks($lei, $uri); - my $opt = { -C => $dir }; - my $cmd = [ @$torsocks, fetch_cmd($lei, $opt) ]; - my $cerr = PublicInbox::LeiMirror::run_reap($lei, $cmd, $opt); - $lei->child_error($cerr, "@$cmd failed") if $cerr; - return; - } - # v2: - opendir my $dh, "$dir/git" or die "opendir $dir/git: $!"; - my @epochs = sort { $b <=> $a } map { substr($_, 0, -4) + 0 } - grep(/\A[0-9]+\.git\z/, readdir($dh)); - my ($git_url, $epoch); - for my $nr (@epochs) { # try newest epoch, first - my $edir = "$dir/git/$nr.git"; - if (defined(my $url = remote_url($lei, $edir))) { - $git_url = $url; - $epoch = $nr; - last; - } else { - warn "W: $edir missing remote.origin.url\n"; - } - } - $git_url or die "Unable to determine git URL\n"; - my $inbox_url = $git_url; - $inbox_url =~ s!/git/$epoch(?:\.git)?/?\z!! or - $inbox_url =~ s!/$epoch(?:\.git)?/?\z!! or die < -EOM - $lei->qerr("# inbox URL: $inbox_url/"); - my $muri = URI->new("$inbox_url/manifest.js.gz"); - my $ft = File::Temp->new(TEMPLATE => 'manifest-XXXX', - UNLINK => 1, DIR => $dir); +sub do_manifest ($$$) { + my ($lei, $dir, $ibx_uri) = @_; + my $muri = URI->new("$ibx_uri/manifest.js.gz"); + my $ft = File::Temp->new(TEMPLATE => 'm-XXXX', + UNLINK => 1, DIR => $dir, SUFFIX => '.tmp'); my $fn = $ft->filename; - my @opt = (qw(-R -o), $fn); my $mf = "$dir/manifest.js.gz"; my $m0; # current manifest.js.gz contents if (open my $fh, '<', $mf) { $m0 = eval { PublicInbox::LeiMirror::decode_manifest($fh, $mf, $mf) }; - $lei->err($@) if $@; - push @opt, '-z', $mf if defined($m0); + warn($@) if $@; } - my $curl_cmd = $curl->for_uri($lei, $muri, @opt); - my $opt = {}; + my ($bn) = ($fn =~ m!/([^/]+)\z!); + my $curl_cmd = $lei->{curl}->for_uri($lei, $muri, qw(-R -o), $bn); + my $opt = { -C => $dir }; $opt->{$_} = $lei->{$_} for (0..2); my $cerr = PublicInbox::LeiMirror::run_reap($lei, $curl_cmd, $opt); - return $lei->child_error($cerr, "@$curl_cmd failed") if $cerr; - return if !-s $ft; # 304 Not Modified via curl -z - - my $m1 = PublicInbox::LeiMirror::decode_manifest($ft, $fn, $muri); + if ($cerr) { + return [ 404, $muri ] if ($cerr >> 8) == 22; # 404 Missing + $lei->child_error($cerr, "@$curl_cmd failed"); + return; + } + my $m1 = eval { + PublicInbox::LeiMirror::decode_manifest($ft, $fn, $muri); + } or return [ 404, $muri ]; my $mdiff = { %$m1 }; - # filter out unchanged entries + # filter out unchanged entries. We check modified, too, since + # fingerprints are SHA-1, so there's a teeny chance they'll collide while (my ($k, $v0) = each %{$m0 // {}}) { my $cur = $m1->{$k} // next; my $f0 = $v0->{fingerprint} // next; @@ -110,36 +83,149 @@ EOM my $t1 = $cur->{modified} // next; delete($mdiff->{$k}) if $f0 eq $f1 && $t0 == $t1; } - my $ibx_uri = URI->new("$inbox_url/"); - my ($path_pfx, $v1_bare, @v2_epochs) = + unless (keys %$mdiff) { + $lei->child_error(127 << 8) if $lei->{opt}->{'exit-code'}; + return; + } + my (undef, $v1_path, @v2_epochs) = PublicInbox::LeiMirror::deduce_epochs($mdiff, $ibx_uri->path); - defined($v1_bare) and die <, WTF? -EOM - my @epoch_nr = sort { $a <=> $b } - map { my ($nr) = (m!/([0-9]+)\.git\z!g) } @v2_epochs; + [ 200, $muri, $v1_path, \@v2_epochs, $ft, $mf, $m1 ]; +} +sub get_fingerprint2 { + my ($git_dir) = @_; + require Digest::SHA; + my $rd = popen_rd([qw(git show-ref)], undef, { -C => $git_dir }); + Digest::SHA::sha256(do { local $/; <$rd> }); +} + +sub writable_dir ($) { + my ($dir) = @_; + return unless -d $dir && -w _; + my @st = stat($dir); + $st[2] & 0222; # any writable bits set? (in case of root) +} + +sub do_fetch { # main entry point + my ($cls, $lei, $cd) = @_; + my $ibx_ver; + $lei->{curl} //= PublicInbox::LeiCurl->new($lei) or return; + my $dir = PublicInbox::Admin::resolve_inboxdir($cd, \$ibx_ver); + my ($ibx_uri, @git_dir, @epochs, $mg, @new_epoch, $skip); + if ($ibx_ver == 1) { + my $url = remote_url($lei, $dir) // + die "E: $dir missing remote.*.url\n"; + $ibx_uri = URI->new($url); + } else { # v2: + require PublicInbox::MultiGit; + $mg = PublicInbox::MultiGit->new($dir, 'all.git', 'git'); + @epochs = $mg->git_epochs; + my ($git_url, $epoch); + for my $nr (@epochs) { # try newest epoch, first + my $edir = "$dir/git/$nr.git"; + if (!writable_dir($edir)) { + $skip->{$nr} = 1; + next; + } + next if defined $git_url; + if (defined(my $url = remote_url($lei, $edir))) { + $git_url = $url; + $epoch = $nr; + } else { + warn "W: $edir missing remote.*.url\n"; + my $pid = spawn([qw(git config -l)], undef, + { 1 => $lei->{2}, 2 => $lei->{2} }); + waitpid($pid, 0); + $lei->child_error($?) if $?; + } + } + @epochs = grep { !$skip->{$_} } @epochs if $skip; + $skip //= {}; # makes code below easier + $git_url or die "Unable to determine git URL\n"; + my $inbox_url = $git_url; + $inbox_url =~ s!/git/$epoch(?:\.git)?/?\z!! or + $inbox_url =~ s!/$epoch(?:\.git)?/?\z!! or die < +EOM + $ibx_uri = URI->new($inbox_url); + } + PublicInbox::LeiMirror::write_makefile($dir, $ibx_ver); + $lei->qerr("# inbox URL: $ibx_uri/"); + my $res = do_manifest($lei, $dir, $ibx_uri) or return; + my ($code, $muri, $v1_path, $v2_epochs, $ft, $mf, $m1) = @$res; + if ($code == 404) { + # any pre-manifest.js.gz instances running? Just fetch all + # existing ones and unconditionally try cloning the next + $v2_epochs = [ map { "$dir/git/$_.git" } @epochs ]; + if (@epochs) { + my $n = $epochs[-1] + 1; + push @$v2_epochs, "$dir/git/$n.git" if !$skip->{$n}; + } + } else { + $code == 200 or die "BUG unexpected code $code\n"; + } + my $mculled; + if ($ibx_ver == 2) { + defined($v1_path) and warn <, WTF? +EOM + @git_dir = map { "$dir/git/$_.git" } sort { $a <=> $b } map { + my ($nr) = (m!/([0-9]+)\.git\z!g); + $skip->{$nr} ? () : $nr; + } @$v2_epochs; + if ($m1 && scalar keys %$skip) { + my $re = join('|', keys %$skip); + my @del = grep(m!/git/$re\.git\z!, keys %$m1); + delete @$m1{@del}; + $mculled = 1; + } + } else { + $git_dir[0] = $dir; + } # n.b. this expects all epochs are from the same host - my $torsocks = $curl->torsocks($lei, $muri); - for my $nr (@epoch_nr) { - my $dir = "$dir/git/$nr.git"; + my $torsocks = $lei->{curl}->torsocks($lei, $muri); + my $fp2 = $lei->{opt}->{'exit-code'} ? [] : undef; + my $xit = 127; + for my $d (@git_dir) { my $cmd; - my $opt = {}; - if (-d $dir) { - $opt->{-C} = $dir; - $cmd = [ @$torsocks, fetch_cmd($lei, $opt) ]; + my $opt = {}; # for spawn + if (-d $d) { + $fp2->[0] = get_fingerprint2($d) if $fp2; + $cmd = [ @$torsocks, 'git', "--git-dir=$d", + fetch_args($lei, $opt) ]; } else { my $e_uri = $ibx_uri->clone; - $e_uri->path($ibx_uri->path."git/$nr.git"); + my ($epath) = ($d =~ m!(/git/[0-9]+\.git)\z!); + defined($epath) or + die "BUG: $d is not an epoch to clone\n"; + $e_uri->path($ibx_uri->path.$epath); $cmd = [ @$torsocks, PublicInbox::LeiMirror::clone_cmd($lei, $opt), - $$e_uri, $dir ]; + $$e_uri, $d]; + push @new_epoch, substr($epath, 5, -4) + 0; + $xit = 0; } my $cerr = PublicInbox::LeiMirror::run_reap($lei, $cmd, $opt); - return $lei->child_error($cerr, "@$cmd failed") if $cerr; + # do not bail on clone failure if we didn't have a manifest + if ($cerr && ($code == 200 || -d $d)) { + $lei->child_error($cerr, "@$cmd failed"); + return; + } + if ($fp2 && $xit) { + $fp2->[1] = get_fingerprint2($d); + $xit = 0 if $fp2->[0] ne $fp2->[1]; + } + } + for my $i (@new_epoch) { $mg->epoch_cfg_set($i) } + if ($ft) { + if ($mculled) { + my $json = PublicInbox::Config->json->encode($m1); + my $fn = $ft->filename; + gzip(\$json => $fn) or die "gzip: $GzipError"; + } + PublicInbox::LeiMirror::ft_rename($ft, $mf, 0666); } - rename($fn, $mf) or die "E: rename($fn, $mf): $!\n"; - $ft->unlink_on_destroy(0); + $lei->child_error($xit << 8) if $fp2 && $xit; } 1;