# "lei add-external --mirror" support (also "public-inbox-clone");
package PublicInbox::LeiMirror;
-use strict;
-use v5.10.1;
+use v5.12;
use parent qw(PublicInbox::IPC);
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use IO::Compress::Gzip qw(gzip $GzipError);
# for old installations without manifest.js.gz
sub try_scrape {
- my ($self) = @_;
+ my ($self, $fallback_manifest) = @_;
my $uri = URI->new($self->{src});
my $lei = $self->{lei};
my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
- my $cmd = $curl->for_uri($lei, $uri, '--compressed');
+ my $cmd = $curl->for_uri($lei, $uri, qw(-f --compressed));
my $opt = { 0 => $lei->{0}, 2 => $lei->{2} };
my $fh = popen_rd($cmd, undef, $opt);
my $html = do { local $/; <$fh> } // die "read(curl $uri): $!";
close($fh) or return $lei->child_error($?, "@$cmd failed");
# we grep with URL below, we don't want Subject/From headers
- # making us clone random URLs
+ # making us clone random URLs. This assumes remote instances
+ # prior to public-inbox 1.7.0
+ # 5b96edcb1e0d8252 (www: move mirror instructions to /text/, 2021-08-28)
my @html = split(/<hr>/, $html);
my @urls = ($html[-1] =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g);
+ if (!@urls && $fallback_manifest) {
+ warn <<EOM;
+W: failed to extract URLs from $uri, trying manifest.js.gz...
+EOM
+ return start_clone_url($self);
+ }
my $url = $uri->as_string;
chop($url) eq '/' or die "BUG: $uri not canonicalized";
my ($n) = (m!/([0-9]+)\z!);
$n => [ URI->new($_), '' ]
} @v2_urls; # uniq
- clone_v2($self, \%v2_epochs);
- reap_live() while keys(%$LIVE);
+ clone_v2_prep($self, \%v2_epochs);
+ delete local $lei->{opt}->{epoch};
+ clone_all($self);
return;
}
($lei->{opt}->{jobs} // 1) > 1;
push @cmd, '-v' if $lei->{opt}->{verbose};
# XXX any other options to support?
- # --reference is tricky with multiple epochs...
+ # --reference is tricky with multiple epochs, but handled
+ # automatically if using manifest.js.gz
@cmd;
}
my $f = (split(m!/!, $endpoint))[-1];
my $ft = File::Temp->new(TEMPLATE => "$f-XXXX", TMPDIR => 1);
my $opt = { 0 => $lei->{0}, 1 => $lei->{1}, 2 => $lei->{2} };
- my $cmd = $self->{curl}->for_uri($lei, $uri, qw(--compressed -R -o),
+ my $cmd = $self->{curl}->for_uri($lei, $uri, qw(-f --compressed -R -o),
$ft->filename);
my $jobs = $lei->{opt}->{jobs} // 1;
reap_live() while keys(%$LIVE) >= $jobs;
PublicInbox::Admin::progress_prepare($opt, $lei->{2});
PublicInbox::Admin::index_inbox($ibx, undef, $opt);
}
- return if defined $self->{cur_dst};
+ return if defined $self->{cur_dst}; # one of many repos to clone
open my $x, '>', "$self->{dst}/mirror.done"; # for _wq_done_wait
}
push @$cmd, '--reference', "$self->{dst}$ref";
start_clone($self, $cmd, $opt, $fini);
- $lei->{opt}->{'inbox-config'} =~ /\A(?:always|v1)\z/s and
+ if (!$self->{-is_epoch} && $lei->{opt}->{'inbox-config'} =~
+ /\A(?:always|v1)\z/s) {
_get_txt_start($self, '_/text/config/raw', $fini);
+ }
my $d = $self->{-ent} ? $self->{-ent}->{description} : undef;
$self->{'txt.description'} = $d if defined $d;
die "rename($f, $o/info/alternates): $!";
$f->unlink_on_destroy(0);
}
+ return if $self->{-is_epoch};
write_makefile($dst, 1);
index_cloned_inbox($self, 1);
}
my $mg = PublicInbox::MultiGit->new($dst, 'all.git', 'git');
$mg->fill_alternates;
for my $i ($mg->git_epochs) { $mg->epoch_cfg_set($i) }
- my $entries = delete($self->{-entv}) // [];
- while (@$entries) {
- my ($edst, $ent) = splice(@$entries);
- if (defined(my $o = $ent->{owner})) {
- run_die [qw(git config -f), "$edst/config",
- 'gitweb.owner', $o];
- }
- my $d = $ent->{description} // next;
- my $fn = "$edst/description";
- open my $fh, '>', $fn or die "open($fn): $!";
- print $fh $d, "\n" or die "print($fn): $!";
- close $fh or die "close($fn): $!";
- }
for my $edst (@{delete($self->{-read_only}) // []}) {
my @st = stat($edst) or die "stat($edst): $!";
chmod($st[2] & 0555, $edst) or die "chmod(a-w, $edst): $!";
}
}
-sub clone_v2 ($$;$) {
+sub clone_v2_prep ($$;$) {
my ($self, $v2_epochs, $m) = @_; # $m => manifest.js.gz hashref
my $lei = $self->{lei};
my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
my $dst = $self->{cur_dst} // $self->{dst};
my $want = parse_epochs($lei->{opt}->{epoch}, $v2_epochs);
my $task = $m ? bless { %$self }, __PACKAGE__ : $self;
- my (@src_edst, @skip, $desc);
+ delete $task->{todo}; # $self->{todo} still exists
+ my (@src_edst, @skip, $desc, @entv);
for my $nr (sort { $a <=> $b } keys %$v2_epochs) {
my ($uri, $key) = @{$v2_epochs->{$nr}};
my $src = $uri->as_string;
$1 + 0 == $nr or die "BUG: <$uri> miskeyed $1 != $nr";
$edst .= "/git/$nr.git";
- my $ent = $m->{$key} // die "BUG: `$key' not in manifest.js.gz";
- if (defined(my $d = $ent->{description})) {
- $d =~ s/ \[epoch [0-9]+\]\z//s;
- $desc = $d;
+ my $ent;
+ if ($m) {
+ $ent = $m->{$key} //
+ die("BUG: `$key' not in manifest.js.gz");
+ if (defined(my $d = $ent->{description})) {
+ $d =~ s/ \[epoch [0-9]+\]\z//s;
+ $desc = $d;
+ }
}
if (!$want || $want->{$nr}) {
push @src_edst, $src, $edst;
- push @{$task->{-entv}}, $edst, $ent;
+ push @entv, $edst, $ent;
+ $self->{any_want}->{$key} = 1;
} else { # create a placeholder so users only need to chmod +w
init_placeholder($src, $edst, $ent);
push @{$task->{-read_only}}, $edst;
}
}
# filter out the epochs we skipped
- $self->{-culled_manifest} = 1 if delete(@$m{@skip});
+ $self->{-culled_manifest} = 1 if $m && delete(@$m{@skip});
(!$self->{dry_run} && !-d $dst) and File::Path::mkpath($dst);
defined($desc) ? ($task->{'txt.description'} = $desc) :
_get_txt_start($task, 'description', $fini);
-
- my @cmd = clone_cmd($lei, my $opt = {});
- while (@src_edst && !$lei->{child_error}) {
- my $cmd = [ @$pfx, @cmd, splice(@src_edst, 0, 2) ];
- start_clone($self, $cmd, $opt, $fini);
+ while (@entv) {
+ my ($edst, $ent) = splice(@entv, 0, 2);
+ my $etask = bless { %$task }, __PACKAGE__;
+ $etask->{-ent} = $ent; # may have {reference}
+ $etask->{cur_src} = shift @src_edst // die 'BUG: no cur_src';
+ $etask->{cur_dst} = shift @src_edst // die 'BUG: no cur_dst';
+ $etask->{cur_dst} eq $edst or
+ die "BUG: `$etask->{cur_dst}' != `$edst'";
+ $etask->{-is_epoch} = $fini;
+ push @{$self->{todo}->{($ent->{reference} // '')}}, $etask;
}
}
}
sub clone_all {
- my ($self, $todo, $m) = @_;
+ my ($self, $m) = @_;
+ my $todo = delete $self->{todo};
+ my $nodep = delete $todo->{''};
+
+ # do not download unwanted deps
+ my $any_want = delete $self->{any_want};
+ my @unwanted = grep { !$any_want->{$_} } keys %$todo;
+ my @nodep = delete(@$todo{@unwanted});
+ push(@$nodep, @$_) for @nodep;
+
# handle no-dependency repos, first
- for (@{delete($todo->{''}) // []}) {
+ for (@$nodep) {
clone_v1($_, 1);
return if $self->{lei}->{child_error};
}
# resolve references, deepest, first:
while (scalar keys %$todo) {
for my $x (keys %$todo) {
- my $nr;
+ my ($nr, $nxt);
# resolve multi-level references
- while (defined(my $nxt = $m->{$x}->{reference})) {
+ while ($m && defined($nxt = $m->{$x}->{reference})) {
exists($todo->{$nxt}) or last;
die <<EOM if ++$nr > 1000;
E: dependency loop detected (`$x' => `$nxt')
last; # restart %$todo iteration
}
}
+ reap_live() while keys(%$LIVE);
}
# FIXME: this gets confused by single inbox instance w/ global manifest.js.gz
my $ft = File::Temp->new(TEMPLATE => '.manifest-XXXX',
UNLINK => 1, TMPDIR => 1, SUFFIX => '.tmp');
my $fn = $ft->filename;
- my $cmd = $curl->for_uri($lei, $uri, '-R', '-o', $fn);
+ my $cmd = $curl->for_uri($lei, $uri, qw(-f -R -o), $fn);
my %opt = map { $_ => $lei->{$_} } (0..2);
my $cerr = run_reap($lei, $cmd, \%opt);
- local $LIVE;
if ($cerr) {
return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing
return $lei->child_error($cerr, "@$cmd failed");
my ($path_pfx, $n, $multi) = multi_inbox($self, \$path, $m);
return $lei->child_error(1, $multi) if !ref($multi);
my $v2 = delete $multi->{v2};
+ local $self->{todo} = {};
if ($v2) {
for my $name (sort keys %$v2) {
my $epochs = delete $v2->{$name};
index($self->{cur_dst}, "\n") >= 0 and die <<EOM;
E: `$self->{cur_dst}' must not contain newline
EOM
- clone_v2($self, \%v2_epochs, $m);
+ clone_v2_prep($self, \%v2_epochs, $m);
return if $self->{lei}->{child_error};
}
}
if (my $v1 = delete $multi->{v1}) {
- delete local $lei->{opt}->{epoch} if defined($v2);
my $p = $path_pfx.$path;
chop($p) if substr($p, -1, 1) eq '/';
$uri->path($p);
- my $todo = {};
- my %want = map { $_ => 1 } @$v1;
for my $name (@$v1) {
my $task = bless { %$self }, __PACKAGE__;
$task->{-ent} = $m->{$name} //
EOM
$task->{cur_src} .= '/';
my $dep = $task->{-ent}->{reference} // '';
- $dep = '' if !$want{$dep};
- push @{$todo->{$dep}}, $task;
+ push @{$self->{todo}->{$dep}}, $task; # for clone_all
+ $self->{any_want}->{$name} = 1;
}
- clone_all($self, $todo, $m);
}
- reap_live() while keys(%$LIVE);
+ delete local $lei->{opt}->{epoch} if defined($v2);
+ clone_all($self, $m);
return if $self->{lei}->{child_error} || $self->{dry_run};
- if (delete $self->{-culled_manifest}) { # set by clone_v2/-I/--exclude
+ # set by clone_v2_prep/-I/--exclude
+ if (delete $self->{-culled_manifest}) {
# write the smaller manifest if epochs were skipped so
# users won't have to delete manifest if they +w an
# epoch they no longer want to skip
$ic =~ /\A(?:v1|v2|always|never)\z/s or die <<"";
--inbox-config must be one of `always', `v2', `v1', or `never'
- my $iv = $lei->{opt}->{'inbox-version'};
- if (defined $iv) {
- local $LIVE;
- return clone_v1($self) if $iv == 1;
- return try_scrape($self) if $iv == 2;
- die "bad --inbox-version=$iv\n";
- }
- return start_clone_url($self) if $self->{src} =~ m!://!;
- die "TODO: cloning local directories not supported, yet";
+ local $LIVE;
+ my $iv = $lei->{opt}->{'inbox-version'} //
+ return start_clone_url($self);
+ return clone_v1($self) if $iv == 1;
+ die "bad --inbox-version=$iv\n" if $iv != 2;
+ die <<EOM if $self->{src} !~ m!://!;
+cloning local v2 inboxes not supported
+EOM
+ try_scrape($self, 1);
};
$lei->fail($@) if $@;
}