Documentation/lei-add-external.pod | 4 +++- Documentation/public-inbox-clone.pod | 6 ++++++ lib/PublicInbox/LeiMirror.pm | 31 +++++++++++++++++++------------ script/public-inbox-clone | 2 +- diff --git a/Documentation/lei-add-external.pod b/Documentation/lei-add-external.pod index 7afcad637f7317044a2409c75341dd8f7074d90a..2a131b553b8ef3b611c732d5336932bbc0f646ee 100644 --- a/Documentation/lei-add-external.pod +++ b/Documentation/lei-add-external.pod @@ -75,7 +75,9 @@ Default: C =item --inbox-version=NUM -Force a public-inbox version (must be C<1> or C<2>). +Force a remote public-inbox version (must be C<1> or C<2>). +This is auto-detected by default, and this option exists mainly +for testing. =back diff --git a/Documentation/public-inbox-clone.pod b/Documentation/public-inbox-clone.pod index 52c89cfd1d97bffc5e7a6ecf892cb408aa08c457..1c31fbb3345000501ff2b814118d99e43e194f2e 100644 --- a/Documentation/public-inbox-clone.pod +++ b/Documentation/public-inbox-clone.pod @@ -76,6 +76,12 @@ no v1 inboxes are present. Default: C +=item --inbox-version=NUM + +Force a remote public-inbox version (must be C<1> or C<2>). +This is auto-detected by default, and this option exists mainly +for testing. + =item -n =item --dry-run diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 18c825d3ccdc7bf69fe59acadeec165867eddfcd..c3512d439e474d371dcf81d934a66d5561fe64de 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -43,7 +43,7 @@ } # for old installations without manifest.js.gz sub try_scrape { - my ($self) = @_; + my ($self, $fallback_manifest) = @_; my $uri = URI->new($self->{src}); my $lei = $self->{lei}; my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; @@ -54,9 +54,17 @@ my $html = do { local $/; <$fh> } // die "read(curl $uri): $!"; close($fh) or return $lei->child_error($?, "@$cmd failed"); # we grep with URL below, we don't want Subject/From headers - # making us clone random URLs + # making us clone random URLs. This assumes remote instances + # prior to public-inbox 1.7.0 + # 5b96edcb1e0d8252 (www: move mirror instructions to /text/, 2021-08-28) my @html = split(/
/, $html); my @urls = ($html[-1] =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g); + if (!@urls && $fallback_manifest) { + warn <as_string; chop($url) eq '/' or die "BUG: $uri not canonicalized"; @@ -603,7 +611,6 @@ my $fn = $ft->filename; my $cmd = $curl->for_uri($lei, $uri, '-R', '-o', $fn); my %opt = map { $_ => $lei->{$_} } (0..2); my $cerr = run_reap($lei, $cmd, \%opt); - local $LIVE; if ($cerr) { return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing return $lei->child_error($cerr, "@$cmd failed"); @@ -698,15 +705,15 @@ my $ic = $lei->{opt}->{'inbox-config'} //= 'always'; $ic =~ /\A(?:v1|v2|always|never)\z/s or die <<""; --inbox-config must be one of `always', `v2', `v1', or `never' - my $iv = $lei->{opt}->{'inbox-version'}; - if (defined $iv) { - local $LIVE; - return clone_v1($self) if $iv == 1; - return try_scrape($self) if $iv == 2; - die "bad --inbox-version=$iv\n"; - } - return start_clone_url($self) if $self->{src} =~ m!://!; - die "TODO: cloning local directories not supported, yet"; + local $LIVE; + my $iv = $lei->{opt}->{'inbox-version'} // + return start_clone_url($self); + return clone_v1($self) if $iv == 1; + die "bad --inbox-version=$iv\n" if $iv != 2; + die <{src} !~ m!://!; +cloning local v2 inboxes not supported +EOM + try_scrape($self, 1); }; $lei->fail($@) if $@; } diff --git a/script/public-inbox-clone b/script/public-inbox-clone index 3d980c97da779a3ca2d69489d48ebc357fbd9595..2900f2329549a1797786c5072a28d251e992b6a6 100755 --- a/script/public-inbox-clone +++ b/script/public-inbox-clone @@ -23,7 +23,7 @@ --quiet | -q increase verbosity (may be repeated) -C DIR chdir to specified directory EOF GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ include|I=s@ exclude=s@ - inbox-config=s + inbox-config=s inbox-version=i dry-run|n jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help; if ($opt->{help}) { print $help; exit }; require PublicInbox::Admin; # loads Config