# Copyright (C) all contributors # License: AGPL-3.0+ # show any VCS object, similar to "git show" # # This can use a "solver" to reconstruct blobs based on git # patches (with abbreviated OIDs in the header). However, the # abbreviated OIDs must match exactly what's in the original # email (unless a normal code repo already has the blob). # # In other words, we can only reliably reconstruct blobs based # on links generated by ViewDiff (and only if the emailed # patches apply 100% cleanly to published blobs). package PublicInbox::ViewVCS; use strict; use v5.10.1; use File::Temp 0.19 (); # newdir use PublicInbox::SolverGit; use PublicInbox::GitAsyncCat; use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::Linkify; use PublicInbox::Tmpfile; use PublicInbox::ViewDiff qw(flush_diff); use PublicInbox::View; use PublicInbox::Eml; use Text::Wrap qw(wrap); use PublicInbox::Hval qw(ascii_html to_filename); my $hl = eval { require PublicInbox::HlMod; PublicInbox::HlMod->new; }; my %QP_MAP = ( A => 'oid_a', a => 'path_a', b => 'path_b' ); our $MAX_SIZE = 1024 * 1024; # TODO: configurable my $BIN_DETECT = 8000; # same as git my $SHOW_FMT = '--pretty=format:'.join('%n', '%P', '%p', '%H', '%T', '%s', '%f', '%an <%ae> %ai', '%cn <%ce> %ci', '%b%x00'); sub html_page ($$;@) { my ($ctx, $code) = @_[0, 1]; my $wcb = delete $ctx->{-wcb}; $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/" my $res = html_oneshot($ctx, $code, @_[2..$#_]); $wcb ? $wcb->($res) : $res; } sub dbg_log ($) { my ($ctx) = @_; my $log = delete $ctx->{lh} // die 'BUG: already captured debug log'; if (!seek($log, 0, 0)) { warn "seek(log): $!"; return '
debug log seek error
'; } $log = do { local $/; <$log> } // do { warn "readline(log): $!"; return '
debug log read error
'; }; $ctx->{-linkify} //= PublicInbox::Linkify->new; '
debug log:

'; } sub stream_blob_parse_hdr { # {parse_hdr} for Qspawn my ($r, $bref, $ctx) = @_; my ($git, $oid, $type, $size, $di) = @{$ctx->{-res}}; my @cl = ('Content-Length', $size); if (!defined $r) { # sysread error html_page($ctx, 500, dbg_log($ctx)); } elsif (index($$bref, "\0") >= 0) { [200, [qw(Content-Type application/octet-stream), @cl] ]; } else { my $n = length($$bref); if ($n >= $BIN_DETECT || $n == $size) { return [200, [ 'Content-Type', 'text/plain; charset=UTF-8', @cl ] ]; } if ($r == 0) { my $log = dbg_log($ctx); warn "premature EOF on $oid $log"; return html_page($ctx, 500, $log); } undef; # bref keeps growing } } sub stream_large_blob ($$) { my ($ctx, $res) = @_; $ctx->{-res} = $res; my ($git, $oid, $type, $size, $di) = @$res; my $cmd = ['git', "--git-dir=$git->{git_dir}", 'cat-file', $type, $oid]; my $qsp = PublicInbox::Qspawn->new($cmd); my $env = $ctx->{env}; $env->{'qspawn.wcb'} = delete $ctx->{-wcb}; $qsp->psgi_return($env, undef, \&stream_blob_parse_hdr, $ctx); } sub show_other_result ($$) { # tag, tree, ... my ($bref, $ctx) = @_; if (my $qsp_err = delete $ctx->{-qsp_err}) { return html_page($ctx, 500, dbg_log($ctx) . "git show error:$qsp_err"); } my $l = PublicInbox::Linkify->new; utf8::decode($$bref); html_page($ctx, 200, '
', $l->to_html($$bref), '

', dbg_log($ctx)); } sub cmt_title { # git->cat_async callback my ($bref, $oid, $type, $size, $ctx) = @_; utf8::decode($$bref); my $title = $$bref =~ /\r?\n\r?\n([^\r\n]+)\r?\n?/ ? $1 : ''; push(@{$ctx->{-cmt_pt}} , ascii_html($title)) == @{$ctx->{-cmt_P}} and cmt_finalize($ctx); } sub show_commit_start { # ->psgi_qx callback my ($bref, $ctx) = @_; if (my $qsp_err = delete $ctx->{-qsp_err}) { return html_page($ctx, 500, dbg_log($ctx) . "git show/patch-id error:$qsp_err"); } my $patchid = (split(/ /, $$bref))[0]; # ignore commit $ctx->{-q_value_html} = "patchid:$patchid" if defined $patchid; open my $fh, '<:utf8', "$ctx->{-tmp}/h" or die "open $ctx->{-tmp}/h: $!"; chop(my $buf = do { local $/ = "\0"; <$fh> }); chomp $buf; my ($P, $p); ($P, $p, @$ctx{qw(cmt_H cmt_T cmt_s cmt_f cmt_au cmt_co cmt_b)}) = split(/\n/, $buf, 9); return cmt_finalize($ctx) if !$P; @{$ctx->{-cmt_P}} = split(/ /, $P); @{$ctx->{-cmt_p}} = split(/ /, $p); # abbreviated if ($ctx->{env}->{'pi-httpd.async'}) { for (@{$ctx->{-cmt_P}}) { ibx_async_cat($ctx, $_, \&cmt_title, $ctx); } } else { # synchronous for (@{$ctx->{-cmt_P}}) { $ctx->{git}->cat_async($_, \&cmt_title, $ctx); } $ctx->{git}->cat_async_wait; } } sub cmt_finalize { my ($ctx) = @_; $ctx->{-linkify} //= PublicInbox::Linkify->new; my $upfx = $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/" # try to keep author and committer dates lined up my ($au, $co) = delete @$ctx{qw(cmt_au cmt_co)}; my $x = length($au) - length($co); if ($x > 0) { $x = ' ' x $x; $co =~ s/>/>$x/; } elsif ($x < 0) { $x = ' ' x (-$x); $au =~ s/>/>$x/; } $_ = ascii_html($_) for ($au, $co); $au =~ s!(> +)([0-9]{4,}-\S+ \S+)! my ($gt, $t) = ($1, $2); $t =~ tr/ :-//d; qq($gt$2) !e; my $s = $ctx->{-linkify}->to_html(delete $ctx->{cmt_s}); $ctx->{-title_html} = $s; my ($P, $p, $pt) = delete @$ctx{qw(-cmt_P -cmt_p -cmt_pt)}; $_ = qq().shift(@$p).' '.shift(@$pt) for @$P; if (@$P == 1) { $x = qq{ (patch)\n parent $P->[0]}; } elsif (@$P > 1) { $x = qq(\n parents $P->[0]\n); shift @$P; $x .= qq( $_\n) for @$P; chop $x; } else { $x = ' (root commit)'; } PublicInbox::WwwStream::html_init($ctx); $ctx->zmore(< commit $ctx->{cmt_H}$x tree $ctx->{cmt_T} author $au committer $co $s EOM $x = delete $ctx->{cmt_b}; $ctx->zmore("\n", $ctx->{-linkify}->to_html($x)) if length($x); undef $x; open my $fh, '<:utf8', "$ctx->{-tmp}/p" or die "open $ctx->{-tmp}/p: $!"; if (-s $fh > $MAX_SIZE) { $ctx->zmore("---\n patch is too large to show\n"); } else { # prepare flush_diff: $ctx->{obuf} = \$x; $ctx->{-apfx} = $ctx->{-spfx} = $upfx; read($fh, my $bdy, -s _); $bdy =~ s/\r?\n/\n/gs; $ctx->{-anchors} = {} if $bdy =~ /^diff --git /sm; flush_diff($ctx, \$bdy); # undefs $bdy $ctx->zmore($x); undef $x; # TODO: should there be another textarea which attempts to # search for the exact email which was applied to make this # commit? if (my $qry = delete $ctx->{-qry}) { my $q = ''; for (@{$qry->{dfpost}}, @{$qry->{dfpre}}) { # keep blobs as short as reasonable, emails # are going to be older than what's in git substr($_, 7, 64, ''); $q .= "dfblob:$_ "; } chop $q; # no trailing SP local $Text::Wrap::columns = PublicInbox::View::COLS; local $Text::Wrap::huge = 'overflow'; $q = wrap('', '', $q); my $rows = ($q =~ tr/\n/\n/) + 1; $q = ascii_html($q); $ctx->zmore(< EOM } } $x = $ctx->zflush($ctx->_html_end); my $res_hdr = delete $ctx->{-res_hdr}; push @$res_hdr, 'Content-Length', length($x); delete($ctx->{env}->{'qspawn.wcb'})->([200, $res_hdr, [$x]]); } sub stream_patch_parse_hdr { # {parse_hdr} for Qspawn my ($r, $bref, $ctx) = @_; if (!defined $r) { # sysread error html_page($ctx, 500, dbg_log($ctx)); } elsif (index($$bref, "\n\n") >= 0) { my $eml = bless { hdr => $bref }, 'PublicInbox::Eml'; my $fn = to_filename($eml->header('Subject') // ''); $fn = substr($fn // 'PATCH-no-subject', 6); # drop "PATCH-" return [ 200, [ 'Content-Type', 'text/plain; charset=UTF-8', 'Content-Disposition', qq(inline; filename=$fn.patch) ] ]; } elsif ($r == 0) { my $log = dbg_log($ctx); warn "premature EOF on $ctx->{patch_oid} $log"; return html_page($ctx, 500, $log); } else { undef; # bref keeps growing until "\n\n" } } sub show_patch ($$) { my ($ctx, $res) = @_; my ($git, $oid) = @$res; my @cmd = ('git', "--git-dir=$git->{git_dir}", qw(format-patch -1 --stdout -C), "--signature=git format-patch -1 --stdout -C $oid", $oid); my $qsp = PublicInbox::Qspawn->new(\@cmd); $ctx->{env}->{'qspawn.wcb'} = delete $ctx->{-wcb}; $ctx->{patch_oid} = $oid; $qsp->psgi_return($ctx->{env}, undef, \&stream_patch_parse_hdr, $ctx); } sub show_commit ($$) { my ($ctx, $res) = @_; return show_patch($ctx, $res) if ($ctx->{fn} // '') =~ /\.patch\z/; my ($git, $oid) = @$res; # patch-id needs two passes, and we use the initial show to ensure # a patch embedded inside the commit message body doesn't get fed # to patch-id: my $cmd = [ '/bin/sh', '-c', "git show --encoding=UTF-8 '$SHOW_FMT'". " -z --no-notes --no-patch $oid >h && ". 'git show --encoding=UTF-8 --pretty=format:%n -M'. " --stat -p $oid >p && ". "git patch-id --stable $git->{git_dir} }; my $qsp = PublicInbox::Qspawn->new($cmd, $e, { -C => "$ctx->{-tmp}" }); $qsp->{qsp_err} = \($ctx->{-qsp_err} = ''); $ctx->{env}->{'qspawn.wcb'} = delete $ctx->{-wcb}; $ctx->{git} = $git; $qsp->psgi_qx($ctx->{env}, undef, \&show_commit_start, $ctx); } sub show_other ($$) { my ($ctx, $res) = @_; my ($git, $oid, $type, $size) = @$res; $size > $MAX_SIZE and return html_page($ctx, 200, "$oid is too big to show\n". dbg_log($ctx)); my $cmd = ['git', "--git-dir=$git->{git_dir}", qw(show --encoding=UTF-8 --no-color --no-abbrev), $oid ]; my $qsp = PublicInbox::Qspawn->new($cmd); $qsp->{qsp_err} = \($ctx->{-qsp_err} = ''); $qsp->psgi_qx($ctx->{env}, undef, \&show_other_result, $ctx); } # user_cb for SolverGit, called as: user_cb->($result_or_error, $uarg) sub solve_result { my ($res, $ctx) = @_; my $hints = delete $ctx->{hints}; $res or return html_page($ctx, 404, dbg_log($ctx)); ref($res) eq 'ARRAY' or return html_page($ctx, 500, dbg_log($ctx)); my ($git, $oid, $type, $size, $di) = @$res; return show_commit($ctx, $res) if $type eq 'commit'; return show_other($ctx, $res) if $type ne 'blob'; my $path = to_filename($di->{path_b} // $hints->{path_b} // 'blob'); my $raw_link = "(raw)"; if ($size > $MAX_SIZE) { return stream_large_blob($ctx, $res) if defined $ctx->{fn}; return html_page($ctx, 200, <Too big to show, download available "$oid $type $size bytes $raw_link EOM } my $blob = $git->cat_file($oid); if (!$blob) { # WTF? my $e = "Failed to retrieve generated blob ($oid)"; warn "$e ($git->{git_dir})"; return html_page($ctx, 500, "
".dbg_log($ctx)) } my $bin = index(substr($$blob, 0, $BIN_DETECT), "\0") >= 0; if (defined $ctx->{fn}) { my $h = [ 'Content-Length', $size, 'Content-Type' ]; push(@$h, ($bin ? 'application/octet-stream' : 'text/plain')); return delete($ctx->{-wcb})->([200, $h, [ $$blob ]]); } $bin and return html_page($ctx, 200, "
$oid $type $size bytes (binary)" .
				" $raw_link
".dbg_log($ctx)); # TODO: detect + convert to ensure validity utf8::decode($$blob); my $nl = ($$blob =~ s/\r?\n/\n/sg); my $pad = length($nl); ($ctx->{-linkify} //= PublicInbox::Linkify->new)->linkify_1($$blob); my $ok = $hl->do_hl($blob, $path) if $hl; if ($ok) { $blob = $ok; } else { $$blob = ascii_html($$blob); } my $x = "
$oid $type $size bytes $raw_link
" . "
". "
	$x .= sprintf("% ${pad}u\n", $_) for (1..$nl);
	$x .= '
'. # pad for non-CSS users ""; # using some of the same CSS class names and ids as cgit html_page($ctx, 200, $x, $ctx->{-linkify}->linkify_2($$blob), ''.dbg_log($ctx)); } # GET /$INBOX/$GIT_OBJECT_ID/s/ # GET /$INBOX/$GIT_OBJECT_ID/s/$FILENAME sub show ($$;$) { my ($ctx, $oid_b, $fn) = @_; my $qp = $ctx->{qp}; my $hints = $ctx->{hints} = {}; while (my ($from, $to) = each %QP_MAP) { defined(my $v = $qp->{$from}) or next; $hints->{$to} = $v if $v ne ''; } $ctx->{fn} = $fn; $ctx->{-tmp} = File::Temp->newdir("solver.$oid_b-XXXX", TMPDIR => 1); open $ctx->{lh}, '+>>', "$ctx->{-tmp}/solve.log" or die "open: $!"; my $solver = PublicInbox::SolverGit->new($ctx->{ibx}, \&solve_result, $ctx); $solver->{tmp} = $ctx->{-tmp}; # share tmpdir # PSGI server will call this immediately and give us a callback (-wcb) sub { $ctx->{-wcb} = $_[0]; # HTTP write callback $solver->solve($ctx->{env}, $ctx->{lh}, $oid_b, $hints); }; } 1;