scalar(@ambiguous) ? \@ambiguous : undef;
}
-# returns a hashref with information about a diff:
+# returns a hashref with information about a diff ($di):
# {
# oid_a => abbreviated pre-image oid,
# oid_b => abbreviated post-image oid,
my ($s, undef) = msg_part_text($part, $ct);
defined $s or return;
my $di = {};
+
+ # Email::MIME::Encodings forces QP to be CRLF upon decoding,
+ # change it back to LF:
+ my $cte = $part->header('Content-Transfer-Encoding') || '';
+ if ($cte =~ /\bquoted-printable\b/i && $part->crlf eq "\n") {
+ $s =~ s/\r\n/\n/sg;
+ }
+
foreach my $l (split(/^/m, $s)) {
if ($l =~ $re) {
$di->{oid_a} = $1;
push @$hdr_lines, $l;
$di->{hdr_lines} = $hdr_lines;
- print $tmp @$hdr_lines, $l or die "print(tmp): $!";
+ print $tmp @$hdr_lines or die "print(tmp): $!";
# for debugging/diagnostics:
$di->{ibx} = $ibx;
my ($path_a, $path_b) = ($1, $2);
+ # diff header lines won't have \r because git
+ # will quote them, but Email::MIME gives CRLF
+ # for quoted-printable:
+ $path_b =~ tr/\r//d;
+
# don't care for leading 'a/' and 'b/'
my (undef, @a) = split(m{/}, git_unquote($path_a));
my (undef, @b) = split(m{/}, git_unquote($path_b));
$? == 0 or die "$msg failed: $?";
}
-sub prepare_wt ($$$$) {
+sub prepare_index ($$$$) {
my ($out, $wt_dir, $existing, $di) = @_;
my $oid_full = $existing->[1];
my ($r, $w);
my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full";
my $mode_a = $di->{mode_a} || extract_old_mode($di);
- my @git = (qw(git -C), $wt_dir);
+ # unlike git-apply(1), this only gets called once in a patch
+ # series and happens too quickly to be worth making async:
pipe($r, $w) or die "pipe: $!";
my $rdr = { 0 => fileno($r) };
- my $pid = spawn([@git, qw(update-index -z --index-info)], {}, $rdr);
+ my $pid = spawn([qw(git -C), $wt_dir,
+ qw(update-index -z --index-info)], undef, $rdr);
close $r or die "close pipe(r): $!";
print $w "$mode_a $oid_full\t$path_a\0" or die "print update-index: $!";
close $w or die "close update-index: $!";
reap($pid, 'update-index -z --index-info');
- $pid = spawn([@git, qw(checkout-index -a -f -u)]);
- reap($pid, 'checkout-index -a -f -u');
-
- print $out "Working tree prepared:\n",
+ print $out "index prepared:\n",
"$mode_a $oid_full\t", git_quote($path_a), "\n";
}
-sub do_apply ($$$$) {
- my ($out, $wt_git, $wt_dir, $di) = @_;
+sub do_apply_begin ($$$) {
+ my ($out, $wt_dir, $di) = @_;
my $tmp = delete $di->{tmp} or die "BUG: no tmp ", di_url($di);
$tmp->flush or die "tmp->flush failed: $!";
defined(my $err_fd = fileno($out)) or die "fileno(out): $!";
my $rdr = { 0 => fileno($tmp), 1 => $err_fd, 2 => $err_fd };
+
+ # we need --ignore-whitespace because some patches are CRLF
my $cmd = [ qw(git -C), $wt_dir,
- qw(apply --whitespace=warn -3 --verbose) ];
- reap(spawn($cmd, undef, $rdr), 'apply');
+ qw(apply --cached --ignore-whitespace
+ --whitespace=warn --verbose) ];
+ spawn($cmd, undef, $rdr);
+}
- local $/ = "\0";
- my $rd = popen_rd([qw(git -C), $wt_dir, qw(ls-files -s -z)]);
+sub do_apply_continue ($$) {
+ my ($wt_dir, $apply_pid) = @_;
+ reap($apply_pid, 'apply');
+ popen_rd([qw(git -C), $wt_dir, qw(ls-files -s -z)]);
+}
+sub do_apply_end ($$$$) {
+ my ($out, $wt_git, $rd, $di) = @_;
+
+ local $/ = "\0";
defined(my $line = <$rd>) or die "failed to read ls-files: $!";
chomp $line or die "no trailing \\0 in [$line] from ls-files";
$file eq $di->{path_b} or
die "index mismatch: file=$file != path_b=$di->{path_b}";
- my $abs_path = "$wt_dir/$file";
- -r $abs_path or die "WT_DIR/$file not readable";
- my $size = -s _;
- print $out "OK $mode_b $oid_b_full $stage\t$file\n";
+ my (undef, undef, $size) = $wt_git->check($oid_b_full);
+
+ defined($size) or die "failed to read_size from $oid_b_full";
+
+ print $out "$mode_b $oid_b_full\t$file\n";
[ $wt_git, $oid_b_full, 'blob', $size, $di ];
}
# can have different HTTP_HOST on the same instance.
my $url = $di->{ibx}->base_url;
my $mid = $di->{smsg}->{mid};
- defined($url) ? "<$url$mid/>" : "<$mid>";
+ defined($url) ? "$url$mid/" : "<$mid>";
}
-sub apply_patches ($$$$$) {
- my ($self, $out, $wt, $found, $patches) = @_;
+# reconstruct the oid_b blob using patches we found:
+sub apply_patches_cb ($$$$$) {
+ my ($self, $out, $found, $patches, $oid_b) = @_;
+
+ my $tot = scalar(@$patches) or return sub {
+ print $out "no patch(es) for $oid_b\n";
+ undef;
+ };
+
+ my $wt = do_git_init_wt($self);
my $wt_dir = $wt->dirname;
my $wt_git = PublicInbox::Git->new("$wt_dir/.git");
$wt_git->{-wt} = $wt;
my $cur = 0;
- my $tot = scalar @$patches;
+ my ($apply_pid, $rd, $di);
+
+ # returns an empty string if in progress, undef if not found,
+ # or the final [ ::Git, oid_full, type, size, $di ] arrayref
+ # if found
+ sub {
+ if ($rd) {
+ $found->{$di->{oid_b}} =
+ do_apply_end($out, $wt_git, $rd, $di);
+ $rd = undef;
+ # continue to shift @$patches
+ } elsif ($apply_pid) {
+ $rd = do_apply_continue($wt_dir, $apply_pid);
+ $apply_pid = undef;
+ return ''; # $rd => do_apply_ned
+ }
+
+ # may return undef here
+ $di = shift @$patches or return $found->{$oid_b};
- foreach my $di (@$patches) {
my $i = ++$cur;
my $oid_a = $di->{oid_a};
my $existing = $found->{$oid_a};
# prepare the worktree for patch application:
if ($i == 1 && $existing) {
- prepare_wt($out, $wt_dir, $existing, $di);
- }
- if (!$empty_oid && ! -f "$wt_dir/$di->{path_a}") {
- die "missing $di->{path_a} at [$i/$tot] ", di_url($di);
+ prepare_index($out, $wt_dir, $existing, $di);
}
print $out "\napplying [$i/$tot] ", di_url($di), "\n",
join('', @{$di->{hdr_lines}}), "\n"
or die "print \$out failed: $!";
- # apply the patch!
- $found->{$di->{oid_b}} = do_apply($out, $wt_git, $wt_dir, $di);
- }
-}
-
-sub dump_found ($$) {
- my ($out, $found) = @_;
- foreach my $oid (sort keys %$found) {
- my ($git, $oid, undef, undef, $di) = @{$found->{$oid}};
- my $loc = $di ? di_url($di) : $git->src_blob_url($oid);
- print $out "$oid from $loc\n";
- }
-}
-
-sub dump_patches ($$) {
- my ($out, $patches) = @_;
- my $tot = scalar(@$patches);
- my $i = 0;
- foreach my $di (@$patches) {
- ++$i;
- print $out "[$i/$tot] ", di_url($di), "\n";
+ # begin the patch application patch!
+ $apply_pid = do_apply_begin($out, $wt_dir, $di);
+ # next call to this callback will call do_apply_continue
+ '';
}
}
# recreate $oid_b
-# Returns a 2-element array ref: [ PublicInbox::Git object, oid_full ]
+# Returns an array ref: [ ::Git object, oid_full, type, size, di ]
# or undef if nothing was found.
+#
+# TODO: complete the migration of this and ViewVCS into an evented
+# model for fairness
sub solve ($$$$) {
my ($self, $out, $oid_b, $hints) = @_;
my @todo = ($req);
my $found = {}; # { abbrev => [ ::Git, oid_full, type, size, $di ] }
my $patches = []; # [ array of $di hashes ]
+ my $max = $self->{max_patches} || 200;
+ my $apply_cb;
+ my $cb = sub {
+ my $want = pop @todo;
+ unless ($want) {
+ $apply_cb ||= apply_patches_cb($self, $out, $found,
+ $patches, $oid_b);
+ return $apply_cb->();
+ }
- my $max = $self->{max_steps} || 200;
- my $steps = 0;
-
- while (defined(my $want = pop @todo)) {
+ if (scalar(@$patches) > $max) {
+ print $out "Aborting, too many steps to $oid_b\n";
+ return;
+ }
# see if we can find the blob in an existing git repo:
+ my $want_oid = $want->{oid_b};
if (my $existing = solve_existing($self, $out, $want)) {
- my $want_oid = $want->{oid_b};
- if ($want_oid eq $oid_b) { # DONE!
- my @pub_urls = $existing->[0]->pub_urls;
- print $out "found $want_oid in ",
- join("\n", @pub_urls),"\n";
- return $existing;
- }
+ print $out "found $want_oid in ",
+ join("\n", $existing->[0]->pub_urls), "\n";
+ return $existing if $want_oid eq $oid_b; # DONE!
$found->{$want_oid} = $existing;
- next; # ok, one blob resolved, more to go?
+ return ''; # ok, one blob resolved, more to go?
}
# scan through inboxes to look for emails which results in
# the oid we want:
+ my $di;
foreach my $ibx (@{$self->{inboxes}}) {
- my $di = find_extract_diff($self, $ibx, $want) or next;
+ $di = find_extract_diff($self, $ibx, $want) or next;
unshift @$patches, $di;
+ print $out "found $want_oid in ",di_url($di),"\n";
# good, we can find a path to the oid we $want, now
# lets see if we need to apply more patches:
my $src = $di->{oid_a};
- if ($src !~ /\A0+\z/) {
- if (++$steps > $max) {
- print $out
-"Aborting, too many steps to $oid_b\n";
- return;
- }
+ last if $src =~ /\A0+\z/;
- # we have to solve it using another oid, fine:
- my $job = {
- oid_b => $src,
- path_b => $di->{path_a},
- };
- push @todo, $job;
- }
+ # we have to solve it using another oid, fine:
+ my $job = { oid_b => $src, path_b => $di->{path_a} };
+ push @todo, $job;
last; # onto the next @todo item
}
- }
-
- unless (scalar(@$patches)) {
- print $out "no patch(es) for $oid_b\n";
- dump_found($out, $found);
- return;
- }
-
- # reconstruct the oid_b blob using patches we found:
- eval {
- my $wt = do_git_init_wt($self);
- apply_patches($self, $out, $wt, $found, $patches);
+ unless ($di) {
+ print $out "$want_oid could not be found\n";
+ return;
+ }
+ ''; # continue onto next @todo item;
};
- if ($@) {
- print $out "E: $@\nfound: ";
- dump_found($out, $found);
- print $out "patches: ";
- dump_patches($out, $patches);
- return;
- }
- $found->{$oid_b};
+ while (1) {
+ my $ret = eval { $cb->() };
+ unless (defined($ret)) {
+ print $out "E: $@\n" if $@;
+ return;
+ }
+ return $ret if ref($ret);
+ # $ret == ''; so continue looping here
+ }
}
1;