# headroom into this.
use POSIX qw(sysconf _SC_ARG_MAX);
my $ARG_SIZE_MAX = (sysconf(_SC_ARG_MAX) || 4096) - 2048;
+my $OID_MIN = 7;
# By default, "git format-patch" generates filenames with a four-digit
# prefix, so that means 9999 patch series are OK, right? :>
push @$hdr_lines, $l;
$di->{hdr_lines} = $hdr_lines;
+ utf8::encode($_) for @$hdr_lines;
print $tmp @$hdr_lines or die "print(tmp): $!";
# for debugging/diagnostics:
$di->{ibx} = $ibx;
$di->{smsg} = $smsg;
- } elsif ($l =~ m!\Adiff --git ("?a/.+) ("?b/.+)$!) {
+ } elsif ($l =~ m!\Adiff --git ("?[^/]+/.+) ("?[^/]+/.+)$!) {
last if $tmp; # got our blob, done!
my ($path_a, $path_b) = ($1, $2);
$di->{path_b} = join('/', @b);
$hdr_lines = [ $l ];
} elsif ($tmp) {
+ utf8::encode($l);
print $tmp $l or die "print(tmp): $!";
} elsif ($hdr_lines) {
push @$hdr_lines, $l;
sub path_searchable ($) { defined($_[0]) && $_[0] =~ m!\A[\w/\. \-]+\z! }
+# ".." appears in path names, which confuses Xapian into treating
+# it as a range query. So we split on ".." since Xapian breaks
+# on punctuation anyways:
+sub filename_query ($) {
+ join('', map { qq( dfn:"$_") } split(/\.\./, $_[0]));
+}
+
sub find_extract_diff ($$$) {
my ($self, $ibx, $want) = @_;
my $srch = $ibx->search or return;
my $path_b = $want->{path_b};
if (path_searchable($path_b)) {
- $q .= qq{ dfn:"$path_b"};
+ $q .= filename_query($path_b);
my $path_a = $want->{path_a};
if (path_searchable($path_a) && $path_a ne $path_b) {
- $q .= qq{ dfn:"$path_a"};
+ $q .= filename_query($path_a);
}
}
my $f = 'objects/info/alternates';
open $fh, '>', "$git_dir/$f" or die "open: $f: $!";
- print($fh (map { "$_->{git_dir}/objects\n" } @{$self->{gits}})) or
- die "print $f: $!";
+ foreach my $git (@{$self->{gits}}) {
+ print $fh $git->git_path('objects'),"\n" or die "print $f: $!";
+ }
close $fh or die "close: $f: $!";
my $tmp_git = $self->{tmp_git} = PublicInbox::Git->new($git_dir);
$tmp_git->{-tmp} = $self->{tmp};
sub extract_old_mode ($) {
my ($di) = @_;
- if (grep(/\Aold mode (100644|100755|120000)$/, @{$di->{hdr_lines}})) {
+ if (join('', @{$di->{hdr_lines}}) =~
+ /^old mode (100644|100755|120000)\b/) {
return $1;
}
'100644';
}
+sub do_finish ($$) {
+ my ($self, $user_cb) = @_;
+ my $found = $self->{found};
+ my $oid_want = $self->{oid_want};
+ if (my $exists = $found->{$oid_want}) {
+ return $user_cb->($exists);
+ }
+
+ # let git disambiguate if oid_want was too short,
+ # but long enough to be unambiguous:
+ my $tmp_git = $self->{tmp_git};
+ if (my @res = $tmp_git->check($oid_want)) {
+ return $user_cb->($found->{$res[0]});
+ }
+ if (my $err = $tmp_git->last_check_err) {
+ dbg($self, $err);
+ }
+ $user_cb->(undef);
+}
+
sub do_step ($) {
my ($self) = @_;
eval {
# our result: (which may be undef)
# Other steps may call user_cb to terminate prematurely
# on error
- } elsif (my $ucb = delete($self->{user_cb})) {
- $ucb->($self->{found}->{$self->{oid_want}});
+ } elsif (my $user_cb = delete($self->{user_cb})) {
+ do_finish($self, $user_cb);
} else {
die 'about to call user_cb twice'; # Oops :x
}
sub mark_found ($$$) {
my ($self, $oid, $found_info) = @_;
- $self->{found}->{$oid} = $found_info;
+ my $found = $self->{found};
+ $found->{$oid} = $found_info;
+ my $oid_cur = $found_info->[1];
+ while ($oid_cur ne $oid && length($oid_cur) > $OID_MIN) {
+ $found->{$oid_cur} = $found_info;
+ chop($oid_cur);
+ }
}
sub parse_ls_files ($$$$) {
my $patches = $self->{patches};
# we need --ignore-whitespace because some patches are CRLF
- my @cmd = qw(git apply --cached --ignore-whitespace
- --whitespace=warn --verbose);
+ my @cmd = (qw(git -C), $dn, qw(apply --cached --ignore-whitespace
+ --whitespace=warn --verbose));
my $len = length(join(' ', @cmd));
my $total = $self->{tot};
my $di; # keep track of the last one for "git ls-files"
$di = shift @$patches;
dbg($self, "\napplying [$i/$total] " . di_url($self, $di) .
"\n" . join('', @{$di->{hdr_lines}}));
- my $pn = $total + 1 - $i;
- my $path = "$dn/$pn";
+ my $path = $total + 1 - $i;
$len += length($path) + 1;
push @cmd, $path;
} while (@$patches && $len < $ARG_SIZE_MAX);
# see if we can find the blob in an existing git repo:
my $cur_want = $want->{oid_b};
+ if ($self->{seen_oid}->{$cur_want}++) {
+ die "Loop detected solving $cur_want\n";
+ }
if (my $existing = solve_existing($self, $want)) {
dbg($self, "found $cur_want in " .
join("\n", $existing->[0]->pub_urls));
}
return next_step($self); # onto the next todo item
}
+ if (length($cur_want) > $OID_MIN) {
+ chop($cur_want);
+ dbg($self, "retrying $want->{oid_b} as $cur_want");
+ $want->{oid_b} = $cur_want;
+ push @{$self->{todo}}, $want;
+ return next_step($self); # retry with shorter abbrev
+ }
+
dbg($self, "could not find $cur_want");
eval { delete($self->{user_cb})->(undef) }; # not found! :<
die "E: $@" if $@;
$self->{oid_want} = $oid_want;
$self->{out} = $out;
+ $self->{seen_oid} = {};
$self->{tot} = 0;
$self->{psgi_env} = $env;
$self->{todo} = [ { %$hints, oid_b => $oid_want } ];
$self->{patches} = []; # [ $di, $di, ... ]
$self->{found} = {}; # { abbr => [ ::Git, oid, type, size, $di ] }
- $self->{tmp} = File::Temp->newdir('solver.tmp-XXXXXXXX', TMPDIR => 1);
+ $self->{tmp} = File::Temp->newdir("solver.$oid_want-XXXXXXXX", TMPDIR => 1);
dbg($self, "solving $oid_want ...");
my $step_cb = step_cb($self);