X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FFetch.pm;h=7881b402e3f6c2b7d870a115b50f30286a029ab4;hb=77b3dac08f58503e374e98c29b9e0ee7c866c0e5;hp=184b4f26319f1f01ddf846a97ae0a3612bffeb3c;hpb=5d9dcc31959d520d3e7bed3f68871245bc915ca3;p=public-inbox.git diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm index 184b4f26..7881b402 100644 --- a/lib/PublicInbox/Fetch.pm +++ b/lib/PublicInbox/Fetch.pm @@ -12,12 +12,14 @@ use PublicInbox::LEI; use PublicInbox::LeiCurl; use PublicInbox::LeiMirror; use File::Temp (); +use PublicInbox::Config; +use IO::Compress::Gzip qw(gzip $GzipError); sub new { bless {}, __PACKAGE__ } -sub fetch_cmd ($$) { +sub fetch_args ($$) { my ($lei, $opt) = @_; - my @cmd = qw(git); + my @cmd; # (git --git-dir=...) to be added by caller $opt->{$_} = $lei->{$_} for (0..2); # we support "-c $key=$val" for arbitrary git config options # e.g.: git -c http.proxy=socks5h://127.0.0.1:9050 @@ -41,10 +43,9 @@ sub remote_url ($$) { sub do_manifest ($$$) { my ($lei, $dir, $ibx_uri) = @_; my $muri = URI->new("$ibx_uri/manifest.js.gz"); - my $ft = File::Temp->new(TEMPLATE => 'manifest-XXXX', - UNLINK => 1, DIR => $dir); + my $ft = File::Temp->new(TEMPLATE => 'm-XXXX', + UNLINK => 1, DIR => $dir, SUFFIX => '.tmp'); my $fn = $ft->filename; - my @opt = (qw(-R -o), $fn); my $mf = "$dir/manifest.js.gz"; my $m0; # current manifest.js.gz contents if (open my $fh, '<', $mf) { @@ -53,16 +54,19 @@ sub do_manifest ($$$) { }; $lei->err($@) if $@; } - my $curl_cmd = $lei->{curl}->for_uri($lei, $muri, @opt); - my $opt = {}; + my ($bn) = ($fn =~ m!/([^/]+)\z!); + my $curl_cmd = $lei->{curl}->for_uri($lei, $muri, qw(-R -o), $bn); + my $opt = { -C => $dir }; $opt->{$_} = $lei->{$_} for (0..2); my $cerr = PublicInbox::LeiMirror::run_reap($lei, $curl_cmd, $opt); if ($cerr) { - return [ 404 ] if ($cerr >> 8) == 22; # 404 Missing + return [ 404, $muri ] if ($cerr >> 8) == 22; # 404 Missing $lei->child_error($cerr, "@$curl_cmd failed"); return; } - my $m1 = PublicInbox::LeiMirror::decode_manifest($ft, $fn, $muri); + my $m1 = eval { + PublicInbox::LeiMirror::decode_manifest($ft, $fn, $muri); + } or return [ 404, $muri ]; my $mdiff = { %$m1 }; # filter out unchanged entries. We check modified, too, since @@ -81,7 +85,7 @@ sub do_manifest ($$$) { } my (undef, $v1_path, @v2_epochs) = PublicInbox::LeiMirror::deduce_epochs($mdiff, $ibx_uri->path); - [ 200, $v1_path, \@v2_epochs, $muri, $ft, $mf ]; + [ 200, $muri, $v1_path, \@v2_epochs, $ft, $mf, $m1 ]; } sub get_fingerprint2 { @@ -96,7 +100,7 @@ sub do_fetch { # main entry point my $ibx_ver; $lei->{curl} //= PublicInbox::LeiCurl->new($lei) or return; my $dir = PublicInbox::Admin::resolve_inboxdir($cd, \$ibx_ver); - my ($ibx_uri, @git_dir, @epochs, $mg, @new_epoch); + my ($ibx_uri, @git_dir, @epochs, $mg, @new_epoch, $skip); if ($ibx_ver == 1) { my $url = remote_url($lei, $dir) // die "E: $dir missing remote.origin.url\n"; @@ -104,18 +108,24 @@ sub do_fetch { # main entry point } else { # v2: require PublicInbox::MultiGit; $mg = PublicInbox::MultiGit->new($dir, 'all.git', 'git'); - my @epochs = $mg->git_epochs; + @epochs = $mg->git_epochs; my ($git_url, $epoch); for my $nr (@epochs) { # try newest epoch, first my $edir = "$dir/git/$nr.git"; + unless (-d $edir && -w _) { # must be writable dir + $skip->{$nr} = 1; + next; + } + next if defined $git_url; if (defined(my $url = remote_url($lei, $edir))) { $git_url = $url; $epoch = $nr; - last; } else { warn "W: $edir missing remote.origin.url\n"; } } + @epochs = grep { !$skip->{$_} } @epochs if $skip; + $skip //= {}; # makes code below easier $git_url or die "Unable to determine git URL\n"; my $inbox_url = $git_url; $inbox_url =~ s!/git/$epoch(?:\.git)?/?\z!! or @@ -127,21 +137,33 @@ EOM PublicInbox::LeiMirror::write_makefile($dir, $ibx_ver); $lei->qerr("# inbox URL: $ibx_uri/"); my $res = do_manifest($lei, $dir, $ibx_uri) or return; - my ($code, $v1_path, $v2_epochs, $muri, $ft, $mf) = @$res; + my ($code, $muri, $v1_path, $v2_epochs, $ft, $mf, $m1) = @$res; if ($code == 404) { # any pre-manifest.js.gz instances running? Just fetch all # existing ones and unconditionally try cloning the next $v2_epochs = [ map { "$dir/git/$_.git" } @epochs ]; - push @$v2_epochs, "$dir/git/".($epochs[-1] + 1) if @epochs; + if (@epochs) { + my $n = $epochs[-1] + 1; + push @$v2_epochs, "$dir/git/$n.git" if !$skip->{$n}; + } } else { $code == 200 or die "BUG unexpected code $code\n"; } + my $mculled; if ($ibx_ver == 2) { defined($v1_path) and warn <, WTF? EOM - @git_dir = map { "$dir/git/$_.git" } sort { $a <=> $b } - map { my ($nr) = (m!/([0-9]+)\.git\z!g) } @$v2_epochs; + @git_dir = map { "$dir/git/$_.git" } sort { $a <=> $b } map { + my ($nr) = (m!/([0-9]+)\.git\z!g); + $skip->{$nr} ? () : $nr; + } @$v2_epochs; + if ($m1 && scalar keys %$skip) { + my $re = join('|', keys %$skip); + my @del = grep(m!/git/$re\.git\z!, keys %$m1); + delete @$m1{@del}; + $mculled = 1; + } } else { $git_dir[0] = $dir; } @@ -153,9 +175,9 @@ EOM my $cmd; my $opt = {}; # for spawn if (-d $d) { - $opt->{-C} = $d; $fp2->[0] = get_fingerprint2($d) if $fp2; - $cmd = [ @$torsocks, fetch_cmd($lei, $opt) ]; + $cmd = [ @$torsocks, 'git', "--git-dir=$d", + fetch_args($lei, $opt) ]; } else { my $e_uri = $ibx_uri->clone; my ($epath) = ($d =~ m!(/git/[0-9]+\.git)\z!); @@ -182,6 +204,10 @@ EOM for my $i (@new_epoch) { $mg->epoch_cfg_set($i) } if ($ft) { my $fn = $ft->filename; + if ($mculled) { + my $json = PublicInbox::Config->json->encode($m1); + gzip(\$json => $fn) or die "gzip: $GzipError"; + } rename($fn, $mf) or die "E: rename($fn, $mf): $!\n"; $ft->unlink_on_destroy(0); }