use warnings;
use File::Temp qw();
use Fcntl qw(SEEK_SET);
-use File::Path qw(make_path);
-use PublicInbox::Git qw(git_unquote);
+use PublicInbox::Git qw(git_unquote git_quote);
use PublicInbox::Spawn qw(spawn popen_rd);
use PublicInbox::MsgIter qw(msg_iter msg_part_text);
use URI::Escape qw(uri_escape_utf8);
}
# look for existing blobs already in git repos
-sub solve_existing ($$) {
- my ($self, $want) = @_;
+sub solve_existing ($$$) {
+ my ($self, $out, $want) = @_;
+ my $oid_b = $want->{oid_b};
+ my @ambiguous; # Array of [ git, $oids]
foreach my $git (@{$self->{gits}}) {
- my ($oid_full, $type, $size) = $git->check($want->{oid_b});
+ my ($oid_full, $type, $size) = $git->check($oid_b);
if (defined($type) && $type eq 'blob') {
return [ $git, $oid_full, $type, int($size) ];
}
+
+ next if length($oid_b) == 40;
+
+ # parse stderr of "git cat-file --batch-check"
+ my $err = $git->last_check_err;
+ my (@oids) = ($err =~ /\b([a-f0-9]{40})\s+blob\b/g);
+ next unless scalar(@oids);
+
+ # TODO: do something with the ambiguous array?
+ # push @ambiguous, [ $git, @oids ];
+
+ print $out "`$oid_b' ambiguous in ",
+ join("\n", $git->pub_urls), "\n",
+ join('', map { "$_ blob\n" } @oids), "\n";
}
- undef;
+ scalar(@ambiguous) ? \@ambiguous : undef;
}
-# returns a hashref with information about a diff:
+# returns a hashref with information about a diff ($di):
# {
# oid_a => abbreviated pre-image oid,
# oid_b => abbreviated post-image oid,
my ($s, undef) = msg_part_text($part, $ct);
defined $s or return;
my $di = {};
+
+ # Email::MIME::Encodings forces QP to be CRLF upon decoding,
+ # change it back to LF:
+ my $cte = $part->header('Content-Transfer-Encoding') || '';
+ if ($cte =~ /\bquoted-printable\b/i && $part->crlf eq "\n") {
+ $s =~ s/\r\n/\n/sg;
+ }
+
foreach my $l (split(/^/m, $s)) {
- if ($l =~ /$re/) {
+ if ($l =~ $re) {
$di->{oid_a} = $1;
$di->{oid_b} = $2;
- my $mode_a = $3;
- if ($mode_a =~ /\A(?:100644|120000|100755)\z/) {
- $di->{mode_a} = $mode_a;
+ if (defined($3)) {
+ my $mode_a = $3;
+ if ($mode_a =~ /\A(?:100644|120000|100755)\z/) {
+ $di->{mode_a} = $mode_a;
+ }
}
# start writing the diff out to a tempfile
open($tmp, '+>', undef) or die "open(tmp): $!";
$di->{tmp} = $tmp;
- $di->{hdr_lines} = $hdr_lines;
- print $tmp @$hdr_lines, $l or die "print(tmp): $!";
+ push @$hdr_lines, $l;
+ $di->{hdr_lines} = $hdr_lines;
+ print $tmp @$hdr_lines or die "print(tmp): $!";
# for debugging/diagnostics:
$di->{ibx} = $ibx;
my ($path_a, $path_b) = ($1, $2);
+ # diff header lines won't have \r because git
+ # will quote them, but Email::MIME gives CRLF
+ # for quoted-printable:
+ $path_b =~ tr/\r//d;
+
# don't care for leading 'a/' and 'b/'
my (undef, @a) = split(m{/}, git_unquote($path_a));
my (undef, @b) = split(m{/}, git_unquote($path_b));
print $tmp $l or die "print(tmp): $!";
} elsif ($hdr_lines) {
push @$hdr_lines, $l;
+ if ($l =~ /\Anew file mode (100644|120000|100755)$/) {
+ $di->{mode_a} = $1;
+ }
}
}
$tmp ? $di : undef;
my $wt = File::Temp->newdir('solver.wt-XXXXXXXX', TMPDIR => 1);
my $dir = $wt->dirname;
- foreach (qw(objects/info refs/heads)) {
- make_path("$dir/.git/$_") or die "make_path $_: $!";
+ foreach ('', qw(objects refs objects/info refs/heads)) {
+ mkdir("$dir/.git/$_") or die "mkdir $_: $!";
}
open my $fh, '>', "$dir/.git/config" or die "open .git/config: $!";
print $fh <<'EOF' or die "print .git/config $!";
my $f = '.git/objects/info/alternates';
open $fh, '>', "$dir/$f" or die "open: $f: $!";
- foreach my $git (@{$self->{gits}}) {
- print $fh "$git->{git_dir}/objects\n" or die "print $f: $!";
- }
+ print($fh (map { "$_->{git_dir}/objects\n" } @{$self->{gits}})) or
+ die "print $f: $!";
close $fh or die "close: $f: $!";
$wt;
}
$? == 0 or die "$msg failed: $?";
}
-sub prepare_wt ($$$) {
- my ($wt_dir, $existing, $di) = @_;
+sub prepare_index ($$$$) {
+ my ($out, $wt_dir, $existing, $di) = @_;
my $oid_full = $existing->[1];
my ($r, $w);
my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full";
my $mode_a = $di->{mode_a} || extract_old_mode($di);
- my @git = (qw(git -C), $wt_dir);
+ # unlike git-apply(1), this only gets called once in a patch
+ # series and happens too quickly to be worth making async:
pipe($r, $w) or die "pipe: $!";
my $rdr = { 0 => fileno($r) };
- my $pid = spawn([@git, qw(update-index -z --index-info)], {}, $rdr);
+ my $pid = spawn([qw(git -C), $wt_dir,
+ qw(update-index -z --index-info)], undef, $rdr);
close $r or die "close pipe(r): $!";
print $w "$mode_a $oid_full\t$path_a\0" or die "print update-index: $!";
+
close $w or die "close update-index: $!";
reap($pid, 'update-index -z --index-info');
- $pid = spawn([@git, qw(checkout-index -a -f -u)]);
- reap($pid, 'checkout-index -a -f -u');
+ print $out "index prepared:\n",
+ "$mode_a $oid_full\t", git_quote($path_a), "\n";
}
-sub do_apply ($$$$) {
- my ($out, $wt_git, $wt_dir, $di) = @_;
+sub do_apply_begin ($$$) {
+ my ($out, $wt_dir, $di) = @_;
- my $tmp = delete $di->{tmp} or die "BUG: no tmp ", di_info($di);
+ my $tmp = delete $di->{tmp} or die "BUG: no tmp ", di_url($di);
$tmp->flush or die "tmp->flush failed: $!";
$out->flush or die "err->flush failed: $!";
sysseek($tmp, 0, SEEK_SET) or die "sysseek(tmp) failed: $!";
defined(my $err_fd = fileno($out)) or die "fileno(out): $!";
my $rdr = { 0 => fileno($tmp), 1 => $err_fd, 2 => $err_fd };
+
+ # we need --ignore-whitespace because some patches are CRLF
my $cmd = [ qw(git -C), $wt_dir,
- qw(apply --whitespace=warn -3 --verbose) ];
- reap(spawn($cmd, undef, $rdr), 'apply');
+ qw(apply --cached --ignore-whitespace
+ --whitespace=warn --verbose) ];
+ spawn($cmd, undef, $rdr);
+}
- local $/ = "\0";
- my $rd = popen_rd([qw(git -C), $wt_dir, qw(ls-files -s -z)]);
+sub do_apply_continue ($$) {
+ my ($wt_dir, $apply_pid) = @_;
+ reap($apply_pid, 'apply');
+ popen_rd([qw(git -C), $wt_dir, qw(ls-files -s -z)]);
+}
+
+sub do_apply_end ($$$$) {
+ my ($out, $wt_git, $rd, $di) = @_;
+ local $/ = "\0";
defined(my $line = <$rd>) or die "failed to read ls-files: $!";
chomp $line or die "no trailing \\0 in [$line] from ls-files";
$file eq $di->{path_b} or
die "index mismatch: file=$file != path_b=$di->{path_b}";
- my $abs_path = "$wt_dir/$file";
- -r $abs_path or die "WT_DIR/$file not readable";
- my $size = -s _;
- print $out "OK $mode_b $oid_b_full $stage\t$file\n";
+ my (undef, undef, $size) = $wt_git->check($oid_b_full);
+
+ defined($size) or die "failed to read_size from $oid_b_full";
+
+ print $out "$mode_b $oid_b_full\t$file\n";
[ $wt_git, $oid_b_full, 'blob', $size, $di ];
}
# can have different HTTP_HOST on the same instance.
my $url = $di->{ibx}->base_url;
my $mid = $di->{smsg}->{mid};
- defined($url) ? "<$url/$mid/>" : "<$mid>";
+ defined($url) ? "$url$mid/" : "<$mid>";
}
-sub apply_patches ($$$$$) {
- my ($self, $out, $wt, $found, $patches) = @_;
+# reconstruct the oid_b blob using patches we found:
+sub apply_patches_cb ($$$$$) {
+ my ($self, $out, $found, $patches, $oid_b) = @_;
+
+ my $tot = scalar(@$patches) or return sub {
+ print $out "no patch(es) for $oid_b\n";
+ undef;
+ };
+
+ my $wt = do_git_init_wt($self);
my $wt_dir = $wt->dirname;
my $wt_git = PublicInbox::Git->new("$wt_dir/.git");
$wt_git->{-wt} = $wt;
my $cur = 0;
- my $tot = scalar @$patches;
+ my ($apply_pid, $rd, $di);
+
+ # returns an empty string if in progress, undef if not found,
+ # or the final [ ::Git, oid_full, type, size, $di ] arrayref
+ # if found
+ sub {
+ if ($rd) {
+ $found->{$di->{oid_b}} =
+ do_apply_end($out, $wt_git, $rd, $di);
+ $rd = undef;
+ # continue to shift @$patches
+ } elsif ($apply_pid) {
+ $rd = do_apply_continue($wt_dir, $apply_pid);
+ $apply_pid = undef;
+ return ''; # $rd => do_apply_ned
+ }
+
+ # may return undef here
+ $di = shift @$patches or return $found->{$oid_b};
- foreach my $di (@$patches) {
my $i = ++$cur;
my $oid_a = $di->{oid_a};
my $existing = $found->{$oid_a};
my $empty_oid = $oid_a =~ /\A0+\z/;
- if ($empty_oid && $i != 0) {
+ if ($empty_oid && $i != 1) {
die "empty oid at [$i/$tot] ", di_url($di);
}
if (!$existing && !$empty_oid) {
# prepare the worktree for patch application:
if ($i == 1 && $existing) {
- prepare_wt($wt_dir, $existing, $di);
- }
- unless (-f "$wt_dir/$di->{path_a}") {
- die "missing $di->{path_a} at [$i/$tot] ", di_url($di);
+ prepare_index($out, $wt_dir, $existing, $di);
}
- print $out "applying [$i/$tot] ", di_url($di), "\n",
+ print $out "\napplying [$i/$tot] ", di_url($di), "\n",
join('', @{$di->{hdr_lines}}), "\n"
or die "print \$out failed: $!";
- # apply the patch!
- $found->{$di->{oid_b}} = do_apply($out, $wt_git, $wt_dir, $di);
- }
-}
-
-sub dump_found ($$) {
- my ($out, $found) = @_;
- foreach my $oid (sort keys %$found) {
- my ($git, $oid, $di) = @{$found->{$oid}};
- my $loc = $di ? di_info($di) : $git->src_blob_url($oid);
- print $out "$oid from $loc\n";
- }
-}
-
-sub dump_patches ($$) {
- my ($out, $patches) = @_;
- my $tot = scalar(@$patches);
- my $i = 0;
- foreach my $di (@$patches) {
- ++$i;
- print $out "[$i/$tot] ", di_url($di), "\n";
+ # begin the patch application patch!
+ $apply_pid = do_apply_begin($out, $wt_dir, $di);
+ # next call to this callback will call do_apply_continue
+ '';
}
}
# recreate $oid_b
-# Returns a 2-element array ref: [ PublicInbox::Git object, oid_full ]
+# Returns an array ref: [ ::Git object, oid_full, type, size, di ]
# or undef if nothing was found.
+#
+# TODO: complete the migration of this and ViewVCS into an evented
+# model for fairness
sub solve ($$$$) {
my ($self, $out, $oid_b, $hints) = @_;
my $req = { %$hints, oid_b => $oid_b };
my @todo = ($req);
- my $found = {}; # { oid_abbrev => [ PublicInbox::Git, oid_full, $di ] }
+ my $found = {}; # { abbrev => [ ::Git, oid_full, type, size, $di ] }
my $patches = []; # [ array of $di hashes ]
+ my $max = $self->{max_patches} || 200;
+ my $apply_cb;
+ my $cb = sub {
+ my $want = pop @todo;
+ unless ($want) {
+ $apply_cb ||= apply_patches_cb($self, $out, $found,
+ $patches, $oid_b);
+ return $apply_cb->();
+ }
- my $max = $self->{max_steps} || 200;
- my $steps = 0;
-
- while (defined(my $want = pop @todo)) {
+ if (scalar(@$patches) > $max) {
+ print $out "Aborting, too many steps to $oid_b\n";
+ return;
+ }
# see if we can find the blob in an existing git repo:
- if (my $existing = solve_existing($self, $want)) {
- my $want_oid = $want->{oid_b};
- return $existing if $want_oid eq $oid_b; # DONE!
+ my $want_oid = $want->{oid_b};
+ if (my $existing = solve_existing($self, $out, $want)) {
+ print $out "found $want_oid in ",
+ join("\n", $existing->[0]->pub_urls), "\n";
+ return $existing if $want_oid eq $oid_b; # DONE!
$found->{$want_oid} = $existing;
- next; # ok, one blob resolved, more to go?
+ return ''; # ok, one blob resolved, more to go?
}
# scan through inboxes to look for emails which results in
# the oid we want:
+ my $di;
foreach my $ibx (@{$self->{inboxes}}) {
- my $di = find_extract_diff($self, $ibx, $want) or next;
+ $di = find_extract_diff($self, $ibx, $want) or next;
unshift @$patches, $di;
+ print $out "found $want_oid in ",di_url($di),"\n";
# good, we can find a path to the oid we $want, now
# lets see if we need to apply more patches:
my $src = $di->{oid_a};
- if ($src !~ /\A0+\z/) {
- if (++$steps > $max) {
- print $out
-"Aborting, too many steps to $oid_b\n";
- return;
- }
+ last if $src =~ /\A0+\z/;
- # we have to solve it using another oid, fine:
- my $job = {
- oid_b => $src,
- path_b => $di->{path_a},
- };
- push @todo, $job;
- }
+ # we have to solve it using another oid, fine:
+ my $job = { oid_b => $src, path_b => $di->{path_a} };
+ push @todo, $job;
last; # onto the next @todo item
}
- }
-
- unless (scalar(@$patches)) {
- print $out "no patch(es) for $oid_b\n";
- dump_found($out, $found);
- return;
- }
-
- # reconstruct the oid_b blob using patches we found:
- eval {
- my $wt = do_git_init_wt($self);
- apply_patches($self, $out, $wt, $found, $patches);
+ unless ($di) {
+ print $out "$want_oid could not be found\n";
+ return;
+ }
+ ''; # continue onto next @todo item;
};
- if ($@) {
- print $out "E: $@\nfound: ";
- dump_found($out, $found);
- print $out "patches: ";
- dump_patches($out, $patches);
- return;
- }
- $found->{$oid_b};
+ while (1) {
+ my $ret = eval { $cb->() };
+ unless (defined($ret)) {
+ print $out "E: $@\n" if $@;
+ return;
+ }
+ return $ret if ref($ret);
+ # $ret == ''; so continue looping here
+ }
}
1;