From: Eric Wong Date: Tue, 2 Feb 2016 04:00:08 +0000 (+0000) Subject: www: support git cloning via dumb HTTP X-Git-Tag: v1.0.0~734 X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=d39a8a440c9b5c59e1fa058467f64034f8974e0e;hp=4c808e262750b717e27e050cecaed0a44e780aa6;p=public-inbox.git www: support git cloning via dumb HTTP This is enabled by default, for now. Smart HTTP cloning support will be added later, but it will be optional since it can be highly CPU and memory intensive. --- diff --git a/lib/PublicInbox/GitHTTPDumb.pm b/lib/PublicInbox/GitHTTPDumb.pm new file mode 100644 index 00000000..c088d8c4 --- /dev/null +++ b/lib/PublicInbox/GitHTTPDumb.pm @@ -0,0 +1,121 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ + +# when no endpoints match, fallback to this and serve a static file +# This can serve Smart HTTP in the future. +package PublicInbox::GitHTTPDumb; +use strict; +use warnings; +use Fcntl qw(:seek); + +# n.b. serving "description" and "cloneurl" should be innocuous enough to +# not cause problems. serving "config" might... +my @text = qw[HEAD info/refs + objects/info/(?:http-alternates|alternates|packs) + cloneurl description]; + +my @binary = qw! + objects/[a-f0-9]{2}/[a-f0-9]{38} + objects/pack/pack-[a-f0-9]{40}\.(?:pack|idx) + !; + +our $ANY = join('|', @binary, @text); +my $BIN = join('|', @binary); +my $TEXT = join('|', @text); + +sub r { + [ $_[0] , [qw(Content-Type text/plain Content-Length 0) ], [] ] +} + +sub serve { + my ($cgi, $git, $path) = @_; + my $type; + if ($path =~ /\A(?:$BIN)\z/o) { + $type = 'application/octet-stream'; + } elsif ($path =~ /\A(?:$TEXT)\z/o) { + $type = 'text/plain'; + } else { + return r(404); + } + my $f = "$git->{git_dir}/$path"; + return r(404) unless -f $f && -r _; + my @st = stat(_); + my $size = $st[7]; + + # TODO: If-Modified-Since and Last-Modified + open my $in, '<', $f or return r(404); + my $code = 200; + my $len = $size; + my @h; + + my $env = $cgi->{env} || \%ENV; + my $range = $env->{HTTP_RANGE}; + if (defined $range && $range =~ /\bbytes=(\d*)-(\d*)\z/) { + ($code, $len) = prepare_range($cgi, $in, \@h, $1, $2, $size); + if ($code == 416) { + push @h, 'Content-Range', "bytes */$size"; + return [ 416, \@h, [] ]; + } + } + + push @h, 'Content-Type', $type, 'Content-Length', $len; + sub { + my ($res) = @_; # Plack callback + my $fh = $res->([ $code, \@h ]); + my $buf; + my $n = 8192; + while ($len > 0) { + $n = $len if $len < $n; + my $r = read($in, $buf, $n); + last if (!defined($r) || $r <= 0); + $len -= $r; + $fh->write($buf); + } + $fh->close; + } +} + +sub prepare_range { + my ($cgi, $in, $h, $beg, $end, $size) = @_; + my $code = 200; + my $len = $size; + if ($beg eq '') { + if ($end ne '') { # "bytes=-$end" => last N bytes + $beg = $size - $end; + $beg = 0 if $beg < 0; + $end = $size - 1; + $code = 206; + } else { + $code = 416; + } + } else { + if ($beg > $size) { + $code = 416; + } elsif ($end eq '' || $end >= $size) { + $end = $size - 1; + $code = 206; + } elsif ($end < $size) { + $code = 206; + } else { + $code = 416; + } + } + if ($code == 206) { + $len = $end - $beg + 1; + if ($len <= 0) { + $code = 416; + } else { + seek($in, $beg, SEEK_SET) or return [ 500, [], [] ]; + push @$h, qw(Accept-Ranges bytes Content-Range); + push @$h, "bytes $beg-$end/$size"; + + # FIXME: Plack::Middleware::Deflater bug? + if (my $env = $cgi->{env}) { + $env->{'psgix.no-compress'} = 1; + } + } + } + ($code, $len); +} + +1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index d5635d84..1c6936f7 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -18,6 +18,7 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape); use constant SSOMA_URL => 'http://ssoma.public-inbox.org/'; use constant PI_URL => 'http://public-inbox.org/'; require PublicInbox::Git; +use PublicInbox::GitHTTPDumb; our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; @@ -42,6 +43,10 @@ sub run { } elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) { invalid_list($ctx, $1) || get_atom($ctx); + } elsif ($path_info =~ m!$LISTNAME_RE/ + ($PublicInbox::GitHTTPDumb::ANY)\z!ox) { + my $path = $2; + invalid_list($ctx, $1) || serve_git($cgi, $ctx->{git}, $path); } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) { msg_page($ctx, $1, $2, $3); @@ -395,4 +400,9 @@ sub msg_page { r404($ctx); } +sub serve_git { + my ($cgi, $git, $path) = @_; + PublicInbox::GitHTTPDumb::serve($cgi, $git, $path); +} + 1; diff --git a/public-inbox-index b/public-inbox-index index 53449556..578d91d5 100755 --- a/public-inbox-index +++ b/public-inbox-index @@ -57,6 +57,9 @@ foreach my $dir (@dirs) { sub index_dir { my ($git_dir) = @_; -d $git_dir or die "$git_dir does not appear to be a git repository\n"; + + system('git', "--git-dir=$git_dir", 'update-server-info') and + die "git update-server-info failed for $git_dir"; my $s = PublicInbox::SearchIdx->new($git_dir, 1); $s->index_sync; } diff --git a/public-inbox-mda b/public-inbox-mda index 73c4ae1c..24feeb81 100755 --- a/public-inbox-mda +++ b/public-inbox-mda @@ -62,7 +62,7 @@ if (PublicInbox::MDA->precheck($filter, $dst->{address}) && PublicInbox::MDA->author_info($msg); END { - search_index_sync($main_repo) if ($? == 0); + index_sync($main_repo) if ($? == 0); }; local $ENV{GIT_AUTHOR_NAME} = $name; @@ -98,8 +98,12 @@ sub do_spamc { return ($@ || $? || !defined($$out) || $$out eq '') ? 0 : 1; } -sub search_index_sync { +sub index_sync { my ($git_dir) = @_; + + # potentially user-visible, ignore errors: + system('git', "--git-dir=$git_dir", 'update-server-info'); + eval { require PublicInbox::SearchIdx; PublicInbox::SearchIdx->new($git_dir, 2)->index_sync; diff --git a/t/cgi.t b/t/cgi.t index 18632cee..4ce6514c 100644 --- a/t/cgi.t +++ b/t/cgi.t @@ -102,6 +102,24 @@ EOF like($res->{head}, qr/Status:\s*404/i, "index returns 404"); } +# dumb HTTP support +{ + my $path = "/test/info/refs"; + my $res = cgi_run($path); + like($res->{head}, qr/Status:\s*200/i, "info/refs readable"); + my $orig = $res->{body}; + + local $ENV{HTTP_RANGE} = 'bytes=5-10'; + $res = cgi_run($path); + like($res->{head}, qr/Status:\s*206/i, "info/refs partial OK"); + is($res->{body}, substr($orig, 5, 6), 'partial body OK'); + + local $ENV{HTTP_RANGE} = 'bytes=5-'; + $res = cgi_run($path); + like($res->{head}, qr/Status:\s*206/i, "info/refs partial past end OK"); + is($res->{body}, substr($orig, 5), 'partial body OK past end'); +} + # atom feeds { local $ENV{HOME} = $home;