X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLeiMirror.pm;h=1ab5e0d898d26a5a842ffd515b2dd17fe9fd35b4;hb=718d054be8b2cc23635b42e8ce880424492d3a84;hp=53f7dd31c36c2a0512a26dccb69b7481a3c6df3c;hpb=69b5ab18a594fa434d79cacee35c4d9a0df7b55c;p=public-inbox.git
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 53f7dd31..1ab5e0d8 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -6,7 +6,9 @@ package PublicInbox::LeiMirror;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC);
+use PublicInbox::Config;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
+use IO::Compress::Gzip qw(gzip $GzipError);
use PublicInbox::Spawn qw(popen_rd spawn run_die);
use File::Temp ();
use Fcntl qw(SEEK_SET O_CREAT O_EXCL O_WRONLY);
@@ -42,7 +44,8 @@ sub try_scrape {
# we grep with URL below, we don't want Subject/From headers
# making us clone random URLs
- my @urls = ($html =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g);
+ my @html = split(/
/, $html);
+ my @urls = ($html[-1] =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g);
my $url = $uri->as_string;
chop($url) eq '/' or die "BUG: $uri not canonicalized";
@@ -184,7 +187,9 @@ sub run_reap {
my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid);
waitpid($pid, 0) == $pid or die "waitpid @$cmd: $!";
@$reap = (); # cancel reap
- $?
+ my $ret = $?;
+ $? = 0; # don't let it influence normal exit
+ $ret;
}
sub clone_v1 {
@@ -264,14 +269,14 @@ EOM
close $fh or die "close:($f): $!";
}
-sub clone_v2 ($$) {
- my ($self, $v2_epochs) = @_;
+sub clone_v2 ($$;$) {
+ my ($self, $v2_epochs, $m) = @_; # $m => manifest.js.gz hashref
my $lei = $self->{lei};
my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
my $pfx = $curl->torsocks($lei, (values %$v2_epochs)[0]) or return;
my $dst = $self->{dst};
my $want = parse_epochs($lei->{opt}->{epoch}, $v2_epochs);
- my (@src_edst, @read_only);
+ my (@src_edst, @read_only, @skip_nr);
for my $nr (sort { $a <=> $b } keys %$v2_epochs) {
my $uri = $v2_epochs->{$nr};
my $src = $uri->as_string;
@@ -286,8 +291,15 @@ failed to extract epoch number from $src
} else { # create a placeholder so users only need to chmod +w
init_placeholder($src, $edst);
push @read_only, $edst;
+ push @skip_nr, $nr;
}
}
+ if (@skip_nr) { # filter out the epochs we skipped
+ my $re = join('|', @skip_nr);
+ my @del = grep(m!/git/$re\.git\z!, keys %$m);
+ delete @$m{@del};
+ $self->{-culled_manifest} = 1;
+ }
my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock';
_try_config($self);
my $on_destroy = $lk->lock_for_scope($$);
@@ -358,7 +370,11 @@ sub try_manifest {
return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing
return $lei->child_error($cerr, "@$cmd failed");
}
- my $m = decode_manifest($ft, $fn, $uri);
+ my $m = eval { decode_manifest($ft, $fn, $uri) };
+ if ($@) {
+ warn $@;
+ return try_scrape($self);
+ }
my ($path_pfx, $v1_path, @v2_epochs) = deduce_epochs($m, $path);
if (@v2_epochs) {
# It may be possible to have v1 + v2 in parallel someday:
@@ -372,13 +388,20 @@ EOM
my ($n) = ("$uri" =~ m!/([0-9]+)\.git\z!);
$n => $uri->clone
} @v2_epochs;
- clone_v2($self, \%v2_epochs);
+ clone_v2($self, \%v2_epochs, $m);
} elsif (defined $v1_path) {
clone_v1($self);
} else {
die "E: confused by <$uri>, possible matches:\n\t",
join(', ', sort keys %$m), "\n";
}
+ if (delete $self->{-culled_manifest}) { # set by clone_v2
+ # write the smaller manifest if epochs were skipped so
+ # users won't have to delete manifest if they +w an
+ # epoch they no longer want to skip
+ my $json = PublicInbox::Config->json->encode($m);
+ gzip(\$json => $fn) or die "gzip: $GzipError";
+ }
my $fin = "$self->{dst}/manifest.js.gz";
rename($fn, $fin) or die "E: rename($fn, $fin): $!";
$ft->unlink_on_destroy(0);