CSS classes (for user-supplied CSS)
-----------------------------------
-span.q - quoted text in email messages
-TODO: consider using highlight(1) via libhighlight-perl in Debian,
- optionally
-...
+See examples in contrib/css/ and lib/PublicInbox/WwwText.pm
+(or https://public-inbox.org/meta/_/text/color/ soon)
Default: none
+=item publicinbox.<name>.httpbackendmax
+
+If a digit, the maximum number of parallel
+L<git-http-backend(1)> processes to allow for cloning this
+particular inbox.
+
+If an alphanumeric value starting with a lowercase alphabetic
+character is specified, the inbox will use a L</NAMED LIMITER>
+which can be shared by multiple inboxes.
+
+Default: 32 (using a default limiter shared by all inboxes)
+
+=item publicinbox.<name>.coderepo
+
+The nickname of a "coderepo" section associated with the inbox.
+May be specified more than once for M:N mapping of code repos to
+inboxes. If enabled, diff hunk headers in patch emails will
+link to the line numbers of blobs.
+
+Default: none
+
+=item publicinbox.<name>.replyto
+
+May be used to control how reply instructions in the PSGI
+interface are displayed.
+
+":none=dead inbox" may be specified to denote an inactive list
+("dead inbox" may be replaced with another phrase).
+
+A list of comma-delimited email addresses may be specified.
+This can be useful for dedicated inboxes for bot emails, but
+discussion happens on a seperate mailing list/inbox.
+
+Mirrors of existing centralized mailing lists may use ":list"
+here to redirect mail only to the configured inbox address.
+The use of ":list" is discouraged for new mailing lists, as it
+leads to centralization.
+
+Default: :all
+
+=item publicinbox.css
+
+The local path name of a CSS file for the PSGI web interface.
+May contain the attributes "media", "title" and "href" which match
+the associated attributes of the HTML <style> tag.
+"href" may be specified to point to the URL of an remote CSS file
+and the path may be "/dev/null" or any empty file.
+Multiple files may be specified and will be included in the
+order specified.
+
=item publicinboxmda.spamcheck
This may be set to C<none> to disable the use of SpamAssassin
Default: 25
+=item coderepo.<name>.dir
+
+The path to a git repository for "publicinbox.<name>.coderepo"
+
+=item coderepo.<name>.cgitUrl
+
+The URL of the cgit instance associated with the coderepo.
+
+Default: none
+
=back
+=head2 NAMED LIMITER (PSGI)
+
+Named limiters are useful for preventing large inboxes from
+monopolizing (or overloading) the server. Since serving git
+clones (via L<git-http-backend(1)> can be memory-intensive for
+large inboxes, it makes sense to put large inboxes on a named
+limiter with a low max value; while smaller inboxes can use
+the default limiter.
+
+=over 8
+
+=item publicinboxlimiter.<name>.max
+
+The maximum number of parallel processes for the given limiter.
+
+=back
+
+=head3 EXAMPLE WITH NAMED LIMITERS
+
+ ; big inboxes which require lots of memory to clone:
+ [publicinbox "big1"]
+ mainrepo = /path/to/big1
+ address = big1@example.com
+ httpbackendmax = big
+ [publicinbox "big2"]
+ mainrepo = /path/to/big2
+ address = big2@example.com
+ httpbackendmax = big
+
+ ; tiny inboxes which are easily cloned:
+ [publicinbox "tiny1"]
+ mainrepo = /path/to/tiny1
+ address = tiny1@example.com
+ [publicinbox "tiny2"]
+ mainrepo = /path/to/tiny2
+ address = tiny2@example.com
+
+ [publicinboxlimiter "big"]
+ max = 4
+
+In the above example, the "big1" and "big2" are limited to four
+parallel L<git-http-backend(1)> processes between them.
+
+However, "tiny1" and "tiny2" will share the default limiter
+which means there can be 32 L<git-http-backend(1)> processes
+between them.
+
=head1 ENVIRONMENT
=over 8
may be too difficult to upgrade due to resource demands.
Only depend on Free Software packages which exist in the "main"
-section of Debian 7.0 and later. (We will bump version requirements
-as time passes, but this is current as of January 2016).
+section of Debian "stable" distribution. That is Debian 9.x
+("stretch") as of this writing, but "oldstable" (8.x, "jessie")
+remains supported for v1 repositories.
+
In general, we favor mature and well-tested old things rather than
the shiny new.
Makefile.PL
README
TODO
+contrib/css/216dark.css
+contrib/css/216light.css
+contrib/css/README
contrib/selinux/el7/publicinbox.fc
contrib/selinux/el7/publicinbox.te
examples/README
examples/apache2_perl_old.conf
examples/cgi-webrick.rb
examples/cgit-commit-filter.lua
+examples/highlight.psgi
examples/logrotate.conf
examples/public-inbox-config
examples/public-inbox-httpd.socket
lib/PublicInbox/HTTP.pm
lib/PublicInbox/HTTPD.pm
lib/PublicInbox/HTTPD/Async.pm
+lib/PublicInbox/HlMod.pm
lib/PublicInbox/Hval.pm
lib/PublicInbox/Import.pm
lib/PublicInbox/Inbox.pm
lib/PublicInbox/SearchMsg.pm
lib/PublicInbox/SearchThread.pm
lib/PublicInbox/SearchView.pm
+lib/PublicInbox/SolverGit.pm
lib/PublicInbox/Spamcheck.pm
lib/PublicInbox/Spamcheck/Spamc.pm
lib/PublicInbox/Spawn.pm
lib/PublicInbox/SpawnPP.pm
lib/PublicInbox/Unsubscribe.pm
+lib/PublicInbox/UserContent.pm
lib/PublicInbox/V2Writable.pm
lib/PublicInbox/View.pm
+lib/PublicInbox/ViewDiff.pm
+lib/PublicInbox/ViewVCS.pm
lib/PublicInbox/WWW.pm
lib/PublicInbox/WWW.pod
lib/PublicInbox/WatchMaildir.pm
lib/PublicInbox/WwwAtomStream.pm
lib/PublicInbox/WwwAttach.pm
+lib/PublicInbox/WwwHighlight.pm
lib/PublicInbox/WwwStream.pm
lib/PublicInbox/WwwText.pm
sa_config/Makefile
t/git-http-backend.t
t/git.fast-import-data
t/git.t
+t/hl_mod.t
t/html_index.t
t/httpd-corner.psgi
t/httpd-corner.t
t/nntpd.t
t/nulsubject.t
t/over.t
+t/perf-msgview.t
t/perf-nntpd.t
t/perf-threading.t
t/plack.t
t/reply.t
t/search-thr-index.t
t/search.t
+t/solve/0001-simple-mod.patch
+t/solve/0002-rename-with-modifications.patch
+t/solver_git.t
t/spamcheck_spamc.t
t/spawn.t
t/thread-cycle.t
check:: pure_all check-manifest
\$(EATMYDATA) prove -lv -j\$(N)
+lib/PublicInbox/UserContent.pm :: contrib/css/216dark.css
+ @\$(PERL) -I lib \$@ \$<
+
EOF
}
* Combined "super server" for NNTP/HTTP/POP3 to reduce memory overhead
-* Optional reply-to-nobody for dead lists.
-
* Configurable linkification for per-inbox shorthands:
"$gmane/123456" could be configured to expand to the
appropriate link pointing to the gmane.org list archives,
* linkify thread skeletons better
https://public-inbox.org/git/6E3699DEA672430CAEA6DEFEDE6918F4@PhilipOakley/
-* generate sample CSS for use with userContent.css/dillo/etc
-
* streaming Email::MIME replacement: currently we generate many
allocations/strings for headers we never look at and slurp
entire message bodies into memory.
* large mbox/Maildir/MH/NNTP spool import (see PublicInbox::Import)
-* Optionally allow indexing Xapian without positional information to
- save space (but prevents "quoted phrase" searching).
-
* Allow NNTP and more of PSGI code to work without Xapian
* Read-only WebDAV interface to the git repo so it can be mounted
--- /dev/null
+/*
+ * Dark color scheme using 216 web-safe colors, inspired
+ * somewhat by the default color scheme in mutt.
+ * It reduces eyestrain for me, and energy usage for all:
+ * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme
+ */
+* { background:#000; color:#ccc }
+
+/*
+ * Underlined links add visual noise which make them hard-to-read.
+ * Use colors to make them stand out, instead.
+ */
+a { color:#69f; text-decoration:none }
+a:visited { color:#96f }
+
+/* quoted text gets a different color */
+*.q { color:#09f }
+
+/*
+ * these may be used with cgit, too
+ * (cgit uses <div>, public-inbox uses <span>)
+ */
+*.add { color:#0ff }
+*.del { color:#f0f }
+*.head { color:#fff }
+*.hunk { color:#c93 }
+
+/*
+ * highlight 3.x colors (tested 3.18)
+ * this doesn't use most of the colors available (I find too many
+ * colors overwhelming). So the #ccc default is commented out.
+ */
+.hl.num { color:#f30 } /* number */
+.hl.esc { color:#f0f } /* escape character */
+.hl.str { color:#f30 } /* string */
+.hl.ppc { color:#f0f } /* preprocessor */
+.hl.pps { color:#f30 } /* preprocessor string */
+.hl.slc { color:#09f } /* single-line comment */
+.hl.com { color:#09f }
+/* .hl.opt { color:#ccc } */
+/* .hl.ipl { color:#ccc } */
+/* .hl.lin { color:#ccc } */
+.hl.kwa { color:#ff0 }
+.hl.kwb { color:#0f0 }
+.hl.kwc { color:#ff0 }
+/* .hl.kwd { color:#ccc } */
--- /dev/null
+/*
+ * Light color scheme using 216 web-safe colors.
+ * Suitable for print, and blinding people with brightness.
+ * Haphazardly thrown together because bright colors hurt my eyes
+ */
+* { background:#fff; color:#333 }
+
+/*
+ * Underlined links add visual noise which make them hard-to-read.
+ * Use colors to make them stand out, instead.
+ */
+a { color:#00f; text-decoration:none }
+a:visited { color:#808 }
+
+/* quoted text gets a different color */
+*.q { color:#006 }
+
+/*
+ * these may be used with cgit, too
+ * (cgit uses <div>, public-inbox uses <span>)
+ */
+*.add { color:#060 }
+*.del {color:#900 }
+*.head { color:#000 }
+*.hunk { color:#960 }
--- /dev/null
+Example CSS for use with public-inbox.
+
+CSS::Minifier or CSS::Minifier::XS will be tried for minimizing
+CSS at startup if available(*).
+
+Multiple CSS files may be configured for user-selectability via
+the "title" attribute or for different media. Local CSS files
+are read into memory once at startup.
+
+If only one CSS file is given without "title", it will be inlined.
+
+Snippet from ~/.public-inbox/config, order matters to browsers.
+-----8<-----
+[publicinbox]
+ ; Depending on the browser, the first entry is the default.
+ ; So having "/dev/null" at the top means no colors by default.
+ ; Using the "title" attribute enables `View -> "Page Style"'
+ ; choices in Firefox.
+ css = /dev/null title=default
+
+ ; git-config supports backslash to continue long lines
+ ; Attributes ('media', 'title') must use single quotes(')
+ ; or no quotes at all, but not double-quotes, as git-config(1)
+ ; won't preserve them:
+ css = /path/to/public-inbox/contrib/css/216dark.css \
+ title=216dark \
+ media='screen,(prefers-color-scheme:dark)'
+
+ ; for tree haters who print web pages :P
+ css = /path/to/public-inbox/contrib/css/216light.css \
+ title=216light \
+ media='screen,print,(prefers-color-scheme:light)'
+
+ ; external CSS may be specified with href.
+ ; Using "//" (protocol-relative) URLs is allowed, as is
+ ; "https://" or "http://" for hosts which only support one protocol.
+ css = href=//example.com/fugly.css title=external
+
+
+(*) "libcss-minifier-perl" or "libcss-minifier-xs-perl"
+ on Debian-based systems
--- /dev/null
+#!/usr/bin/perl -w
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Usage: plackup [OPTIONS] /path/to/this/file
+# A startup command for development which monitors changes:
+# plackup -I lib -o 127.0.0.1 -R lib -r examples/highlight.psgi
+use strict;
+use warnings;
+use PublicInbox::WwwHighlight;
+use Plack::Builder;
+my $hl = PublicInbox::WwwHighlight->new;
+builder { sub { $hl->call(@_) }; }
use strict;
use warnings;
use PublicInbox::WWW;
-PublicInbox::WWW->preload;
use Plack::Builder;
my $www = PublicInbox::WWW->new;
+$www->preload;
# share the public-inbox code itself:
my $src = $ENV{SRC_GIT_DIR}; # '/path/to/public-inbox.git'
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Used throughout the project for reading configuration
+#
+# Note: I hate camelCase; but git-config(1) uses it, but it's better
+# than alllowercasewithoutunderscores, so use lc('configKey') where
+# applicable for readability
+
package PublicInbox::Config;
use strict;
use warnings;
require PublicInbox::Inbox;
use PublicInbox::Spawn qw(popen_rd);
+sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] }
+
# returns key-value pairs of config directives in a hash
# if keys may be multi-value, the value is an array ref containing all values
sub new {
$self->{-by_newsgroup} ||= {};
$self->{-no_obfuscate} ||= {};
$self->{-limiters} ||= {};
+ $self->{-code_repos} ||= {}; # nick => PublicInbox::Git object
if (my $no = delete $self->{'publicinbox.noobfuscate'}) {
- $no = [ $no ] if ref($no) ne 'ARRAY';
+ $no = _array($no);
my @domains;
foreach my $n (@$no) {
my @n = split(/\s+/, $n);
my $nod = join('|', @domains);
$self->{-no_obfuscate_re} = qr/(?:$nod)\z/i;
}
+ if (my $css = delete $self->{'publicinbox.css'}) {
+ $self->{css} = _array($css);
+ }
$self;
}
sub each_inbox {
my ($self, $cb) = @_;
- my %seen;
- foreach my $k (keys %$self) {
- $k =~ m!\Apublicinbox\.([^/]+)\.mainrepo\z! or next;
- next if $seen{$1};
- $seen{$1} = 1;
- my $ibx = lookup_name($self, $1) or next;
- $cb->($ibx);
+ if (my $section_order = $self->{-section_order}) {
+ foreach my $section (@$section_order) {
+ next if $section !~ m!\Apublicinbox\.([^/]+)\z!;
+ $self->{"publicinbox.$1.mainrepo"} or next;
+ my $ibx = lookup_name($self, $1) or next;
+ $cb->($ibx);
+ }
+ } else {
+ my %seen;
+ foreach my $k (keys %$self) {
+ $k =~ m!\Apublicinbox\.([^/]+)\.mainrepo\z! or next;
+ next if $seen{$1};
+ $seen{$1} = 1;
+ my $ibx = lookup_name($self, $1) or next;
+ $cb->($ibx);
+ }
}
}
sub git_config_dump {
my ($file) = @_;
- my ($in, $out);
+ my (%section_seen, @section_order);
my @cmd = (qw/git config/, "--file=$file", '-l');
my $cmd = join(' ', @cmd);
my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n";
while (defined(my $line = <$fh>)) {
chomp $line;
my ($k, $v) = split(/=/, $line, 2);
- my $cur = $rv{$k};
+ my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/);
+ unless (defined $section_seen{$section}) {
+ $section_seen{$section} = 1;
+ push @section_order, $section;
+ }
+
+ my $cur = $rv{$k};
if (defined $cur) {
if (ref($cur) eq "ARRAY") {
push @$cur, $v;
}
}
close $fh or die "failed to close ($cmd) pipe: $?";
+ $rv{-section_order} = \@section_order;
\%rv;
}
+sub valid_inbox_name ($) {
+ my ($name) = @_;
+
+ # Similar rules found in git.git/remote.c::valid_remote_nick
+ # and git.git/refs.c::check_refname_component
+ # We don't reject /\.lock\z/, however, since we don't lock refs
+ if ($name eq '' || $name =~ /\@\{/ ||
+ $name =~ /\.\./ || $name =~ m![/:\?\[\]\^~\s\f[:cntrl:]\*]! ||
+ $name =~ /\A\./ || $name =~ /\.\z/) {
+ return 0;
+ }
+
+ # Note: we allow URL-unfriendly characters; users may configure
+ # non-HTTP-accessible inboxes
+ 1;
+}
+
+# parse a code repo
+# Only git is supported at the moment, but SVN and Hg are possibilities
+sub _fill_code_repo {
+ my ($self, $nick) = @_;
+ my $pfx = "coderepo.$nick";
+
+ my $dir = $self->{"$pfx.dir"}; # aka "GIT_DIR"
+ unless (defined $dir) {
+ warn "$pfx.repodir unset";
+ return;
+ }
+
+ my $git = PublicInbox::Git->new($dir);
+ foreach my $t (qw(blob commit tree tag)) {
+ $git->{$t.'_url_format'} =
+ _array($self->{lc("$pfx.${t}UrlFormat")});
+ }
+
+ if (my $cgits = $self->{lc("$pfx.cgitUrl")}) {
+ $git->{cgit_url} = $cgits = _array($cgits);
+
+ # cgit supports "/blob/?id=%s", but it's only a plain-text
+ # display and requires an unabbreviated id=
+ foreach my $t (qw(blob commit tag)) {
+ $git->{$t.'_url_format'} ||= map {
+ "$_/$t/?id=%s"
+ } @$cgits;
+ }
+ }
+ # TODO: support gitweb and other repository viewers?
+ # TODO: parse cgitrc
+
+ $git;
+}
+
sub _fill {
my ($self, $pfx) = @_;
my $rv = {};
}
# TODO: more arrays, we should support multi-value for
# more things to encourage decentralization
- foreach my $k (qw(address altid nntpmirror)) {
+ foreach my $k (qw(address altid nntpmirror coderepo)) {
if (defined(my $v = $self->{"$pfx.$k"})) {
- $rv->{$k} = ref($v) eq 'ARRAY' ? $v : [ $v ];
+ $rv->{$k} = _array($v);
}
}
my $name = $pfx;
$name =~ s/\Apublicinbox\.//;
- # same rules as git.git/remote.c::valid_remote_nick
- if ($name eq '' || $name =~ m!/! || $name eq '.' || $name eq '..') {
+ if (!valid_inbox_name($name)) {
warn "invalid inbox name: '$name'\n";
return;
}
$rv->{-no_obfuscate_re} = $self->{-no_obfuscate_re};
each_inbox($self, sub {}); # noop to populate -no_obfuscate
}
+
+ if (my $ibx_code_repos = $rv->{coderepo}) {
+ my $code_repos = $self->{-code_repos};
+ my $repo_objs = $rv->{-repo_objs} = [];
+ foreach my $nick (@$ibx_code_repos) {
+ valid_inbox_name($nick) or next;
+ my $repo = $code_repos->{$nick} ||=
+ _fill_code_repo($self, $nick);
+ push @$repo_objs, $repo if $repo;
+ }
+ }
+
$rv
}
use PublicInbox::Hval;
use PublicInbox::MID qw/mid2path/;
use PublicInbox::WwwStream;
+our $MIN_PARTIAL_LEN = 16;
# TODO: user-configurable
our @EXT_URL = (
sub search_partial ($$) {
my ($srch, $mid) = @_;
+ return if length($mid) < $MIN_PARTIAL_LEN;
my $opt = { limit => PARTIAL_MAX, mset => 2 };
my @try = ("m:$mid*");
my $chop = $mid;
}
foreach my $m (@try) {
- my $mset = eval { $srch->query($m, $opt) };
- if (ref($@) eq 'Search::Xapian::QueryParserError') {
- # If Xapian can't handle the wildcard since it
- # has too many results.
- next;
- }
+ # If Xapian can't handle the wildcard since it
+ # has too many results. $@ can be
+ # Search::Xapian::QueryParserError or even:
+ # "something terrible happened at ../Search/Xapian/Enquire.pm"
+ my $mset = eval { $srch->query($m, $opt) } or next;
+
my @mids = map {
my $doc = $_->get_document;
PublicInbox::SearchMsg->load_doc($doc)->mid;
}
# can't find a partial match in current inbox, try the others:
- if (!$n_partial && length($mid) >= 16) {
+ if (!$n_partial && length($mid) >= $MIN_PARTIAL_LEN) {
foreach my $ibx (@ibx) {
$srch = $ibx->search or next;
$mids = search_partial($srch, $mid) or next;
use warnings;
sub new {
- my ($class, $rpipe, $end, $buf) = @_;
- bless { rpipe => $rpipe, end => $end, buf => $buf }, $class;
+ my ($class, $rpipe, $end, $buf, $filter) = @_;
+ bless {
+ rpipe => $rpipe,
+ end => $end,
+ buf => $buf,
+ filter => $filter || 0,
+ }, $class;
}
# close should always be called after getline returns undef,
sub getline {
my ($self) = @_;
+ my $filter = $self->{filter};
+ return if $filter == -1; # last call was EOF
+
my $buf = delete $self->{buf}; # initial buffer
- defined $buf ? $buf : $self->{rpipe}->getline;
+ $buf = $self->{rpipe}->getline unless defined $buf;
+ $self->{filter} = -1 unless defined $buf; # set EOF for next call
+ $filter ? $filter->($buf) : $buf;
}
sub close {
use POSIX qw(dup2);
require IO::Handle;
use PublicInbox::Spawn qw(spawn popen_rd);
+use base qw(Exporter);
+our @EXPORT_OK = qw(git_unquote git_quote);
+
+my %GIT_ESC = (
+ a => "\a",
+ b => "\b",
+ f => "\f",
+ n => "\n",
+ r => "\r",
+ t => "\t",
+ v => "\013",
+ '"' => '"',
+ '\\' => '\\',
+);
+my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC;
+
+
+# unquote pathnames used by git, see quote.c::unquote_c_style.c in git.git
+sub git_unquote ($) {
+ return $_[0] unless ($_[0] =~ /\A"(.*)"\z/);
+ $_[0] = $1;
+ $_[0] =~ s/\\([\\"abfnrtv])/$GIT_ESC{$1}/g;
+ $_[0] =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
+ $_[0];
+}
+
+sub git_quote ($) {
+ if ($_[0] =~ s/([\\"\a\b\f\n\r\t\013]|[^[:print:]])/
+ '\\'.($ESC_GIT{$1}||sprintf("%0o",ord($1)))/egs) {
+ return qq{"$_[0]"};
+ }
+ $_[0];
+}
sub new {
my ($class, $git_dir) = @_;
my @st;
$st[7] = $st[10] = 0;
- bless { git_dir => $git_dir, st => \@st }, $class
+ # may contain {-tmp} field for File::Temp::Dir
+ bless { git_dir => $git_dir, st => \@st, -git_path => {} }, $class
+}
+
+sub git_path ($$) {
+ my ($self, $path) = @_;
+ $self->{-git_path}->{$path} ||= do {
+ local $/ = "\n";
+ chomp(my $str = $self->qx(qw(rev-parse --git-path), $path));
+ $str;
+ };
}
sub alternates_changed {
my ($self) = @_;
- my $alt = "$self->{git_dir}/objects/info/alternates";
+ my $alt = git_path($self, 'objects/info/alternates');
my @st = stat($alt) or return 0;
my $old_st = $self->{st};
# 10 - ctime, 7 - size
$self->{st} = \@st;
}
+sub last_check_err {
+ my ($self) = @_;
+ my $fh = $self->{err_c} or return;
+ sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
+ defined(sysread($fh, my $buf, -s $fh)) or
+ fail($self, "sysread failed: $!");
+ $buf;
+}
+
sub _bidi_pipe {
- my ($self, $batch, $in, $out, $pid) = @_;
- return if $self->{$pid};
+ my ($self, $batch, $in, $out, $pid, $err) = @_;
+ if ($self->{$pid}) {
+ if (defined $err) { # "err_c"
+ my $fh = $self->{$err};
+ sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
+ truncate($fh, 0) or fail($self, "truncate failed: $!");
+ }
+ return;
+ }
my ($in_r, $in_w, $out_r, $out_w);
pipe($in_r, $in_w) or fail($self, "pipe failed: $!");
fcntl($in_w, 1031, 4096) if $batch eq '--batch-check';
}
- my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch);
+ my @cmd = (qw(git), "--git-dir=$self->{git_dir}",
+ qw(-c core.abbrev=40 cat-file), $batch);
my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) };
+ if ($err) {
+ open(my $fh, '+>', undef) or fail($self, "open.err failed: $!");
+ $self->{$err} = $fh;
+ $redir->{2} = fileno($fh);
+ }
my $p = spawn(\@cmd, undef, $redir);
defined $p or fail($self, "spawn failed: $!");
$self->{$pid} = $p;
sub check {
my ($self, $obj) = @_;
- $self->_bidi_pipe(qw(--batch-check in_c out_c pid_c));
+ _bidi_pipe($self, qw(--batch-check in_c out_c pid_c err_c));
$self->{out_c}->print($obj, "\n") or fail($self, "write error: $!");
local $/ = "\n";
chomp(my $line = $self->{in_c}->getline);
my ($hex, $type, $size) = split(' ', $line);
- return if $type eq 'missing';
+
+ # Future versions of git.git may show 'ambiguous', but for now,
+ # we must handle 'dangling' below (and maybe some other oddball
+ # stuff):
+ # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
+ return if $type eq 'missing' || $type eq 'ambiguous';
+
+ if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') {
+ $size = $type + length("\n");
+ my $r = read($self->{in_c}, my $buf, $size);
+ defined($r) or fail($self, "read failed: $!");
+ return;
+ }
+
($hex, $type, $size);
}
sub _destroy {
- my ($self, $in, $out, $pid) = @_;
+ my ($self, $in, $out, $pid, $expire) = @_;
+ my $rfh = $self->{$in} or return;
+ if (defined $expire) {
+ # at least FreeBSD 11.2 and Linux 4.20 update mtime of the
+ # read end of a pipe when the pipe is written to; dunno
+ # about other OSes.
+ my $mtime = (stat($rfh))[9];
+ return if $mtime > $expire;
+ }
my $p = delete $self->{$pid} or return;
foreach my $f ($in, $out) {
delete $self->{$f};
<$fh>
}
+# returns true if there are pending "git cat-file" processes
sub cleanup {
- my ($self) = @_;
- _destroy($self, qw(in out pid));
- _destroy($self, qw(in_c out_c pid_c));
+ my ($self, $expire) = @_;
+ _destroy($self, qw(in out pid), $expire);
+ _destroy($self, qw(in_c out_c pid_c), $expire);
+ !!($self->{pid} || $self->{pid_c});
}
# assuming a well-maintained repo, this should be a somewhat
sub packed_bytes {
my ($self) = @_;
my $n = 0;
- foreach my $p (glob("$self->{git_dir}/objects/pack/*.pack")) {
+ my $pack_dir = git_path($self, 'objects/pack');
+ foreach my $p (glob("$pack_dir/*.pack")) {
$n += -s $p;
}
$n
sub DESTROY { cleanup(@_) }
+sub local_nick ($) {
+ my ($self) = @_;
+ my $ret = '???';
+ # don't show full FS path, basename should be OK:
+ if ($self->{git_dir} =~ m!/([^/]+)(?:/\.git)?\z!) {
+ $ret = "/path/to/$1";
+ }
+ wantarray ? ($ret) : $ret;
+}
+
+# show the blob URL for cgit/gitweb/whatever
+sub src_blob_url {
+ my ($self, $oid) = @_;
+ # blob_url_format = "https://example.com/foo.git/blob/%s"
+ if (my $bfu = $self->{blob_url_format}) {
+ return map { sprintf($_, $oid) } @$bfu if wantarray;
+ return sprintf($bfu->[0], $oid);
+ }
+ local_nick($self);
+}
+
+sub pub_urls {
+ my ($self) = @_;
+ if (my $urls = $self->{cgit_url}) {
+ return @$urls;
+ }
+ local_nick($self);
+}
+
1;
__END__
=pod
$env{$name} = $val if defined $val;
}
my $limiter = $git->{-httpbackend_limiter} || $default_limiter;
- my $git_dir = $git->{git_dir};
$env{GIT_HTTP_EXPORT_ALL} = '1';
- $env{PATH_TRANSLATED} = "$git_dir/$path";
+ $env{PATH_TRANSLATED} = "$git->{git_dir}/$path";
my $rdr = { 0 => fileno($in) };
my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr);
- my ($fh, $rpipe);
- my $end = sub {
- if (my $err = $qsp->finish) {
- err($env, "git http-backend ($git_dir): $err");
- }
- $fh->close if $fh; # async-only
- };
-
- # Danga::Socket users, we queue up the read_enable callback to
- # fire after pending writes are complete:
- my $buf = '';
- my $rd_hdr = sub {
- my $r = sysread($rpipe, $buf, 1024, length($buf));
- return if !defined($r) && ($!{EINTR} || $!{EAGAIN});
- return r(500, 'http-backend error') unless $r;
- $r = parse_cgi_headers(\$buf) or return; # incomplete headers
+ $qsp->psgi_return($env, $limiter, sub {
+ my ($r, $bref) = @_;
+ $r = parse_cgi_headers($bref) or return; # incomplete headers
$r->[0] == 403 ? serve_dumb($env, $git, $path) : $r;
- };
- my $res;
- my $async = $env->{'pi-httpd.async'}; # XXX unstable API
- my $cb = sub {
- my $r = $rd_hdr->() or return;
- $rd_hdr = undef;
- if (scalar(@$r) == 3) { # error:
- if ($async) {
- $async->close; # calls rpipe->close
- } else {
- $rpipe->close;
- $end->();
- }
- $res->($r);
- } elsif ($async) {
- $fh = $res->($r);
- $async->async_pass($env->{'psgix.io'}, $fh, \$buf);
- } else { # for synchronous PSGI servers
- require PublicInbox::GetlineBody;
- $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end,
- $buf);
- $res->($r);
- }
- };
- sub {
- ($res) = @_;
-
- # hopefully this doesn't break any middlewares,
- # holding the input here is a waste of FDs and memory
- $env->{'psgi.input'} = undef;
-
- $qsp->start($limiter, sub { # may run later, much later...
- ($rpipe) = @_;
- $in = undef;
- if ($async) {
- $async = $async->($rpipe, $cb, $end);
- } else { # generic PSGI
- $cb->() while $rd_hdr;
- }
- });
- };
+ });
}
sub input_to_file {
'psgi.run_once' => Plack::Util::FALSE,
'psgi.multithread' => Plack::Util::FALSE,
'psgi.multiprocess' => Plack::Util::TRUE,
+
+ # We don't use this anywhere, but we can support
+ # other PSGI apps which might use it:
'psgix.input.buffered' => Plack::Util::TRUE,
- # XXX unstable API!
+ # XXX unstable API!, only GitHTTPBackend needs
+ # this to limit git-http-backend(1) parallelism.
+ # The rest of our PSGI code is generic, relying
+ # on "pull" model using "getline" to prevent
+ # over-buffering.
'pi-httpd.async' => do {
no warnings 'once';
*pi_httpd_async
sub new {
my ($class, $io, $cb, $cleanup) = @_;
+
+ # no $io? call $cb at the top of the next event loop to
+ # avoid recursion:
+ unless (defined($io)) {
+ PublicInbox::EvCleanup::asap($cb) if $cb;
+ PublicInbox::EvCleanup::next_tick($cleanup) if $cleanup;
+ return;
+ }
+
my $self = fields::new($class);
IO::Handle::blocking($io, 0);
$self->SUPER::new($io);
$self;
}
+# fires after pending writes are complete:
sub restart_read_cb ($) {
my ($self) = @_;
sub { $self->watch_read(1) }
my $r = sysread($self->{sock}, $$bref, 8192);
if ($r) {
$fh->write($$bref);
- return if $http->{closed};
- if ($http->{write_buf_size}) {
- $self->watch_read(0);
- $http->write(restart_read_cb($self));
+ unless ($http->{closed}) { # Danga::Socket sets this
+ if ($http->{write_buf_size}) {
+ $self->watch_read(0);
+ $http->write(restart_read_cb($self));
+ }
+ # stay in watch_read, but let other clients
+ # get some work done, too.
+ return;
}
- # stay in watch_read, but let other clients
- # get some work done, too.
- return;
+ # fall through to close below...
} elsif (!defined $r) {
return if $!{EAGAIN} || $!{EINTR};
}
sub event_read { $_[0]->{cb}->(@_) }
sub event_hup { $_[0]->{cb}->(@_) }
sub event_err { $_[0]->{cb}->(@_) }
-sub sysread { shift->{sock}->sysread(@_) }
sub close {
my $self = shift;
--- /dev/null
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# I have no idea how stable or safe this is for handling untrusted
+# input, but it seems to have been around for a while, and the
+# highlight(1) executable is supported by gitweb and cgit.
+#
+# I'm also unsure about API stability, but highlight 3.x seems to
+# have been around a few years and ikiwiki (apparently the only
+# user of the SWIG/Perl bindings, at least in Debian) hasn't needed
+# major changes to support it in recent years.
+#
+# Some code stolen from ikiwiki (GPL-2.0+)
+# wrapper for SWIG-generated highlight.pm bindings
+package PublicInbox::HlMod;
+use strict;
+use warnings;
+use highlight; # SWIG-generated stuff
+
+sub _parse_filetypes ($) {
+ my $ft_conf = $_[0]->searchFile('filetypes.conf') or
+ die 'filetypes.conf not found by highlight';
+ open my $fh, '<', $ft_conf or die "failed to open($ft_conf): $!";
+ local $/;
+ my $cfg = <$fh>;
+ my %ext2lang;
+ my @shebang; # order matters
+
+ # Hrm... why isn't this exposed by the highlight API?
+ # highlight >= 3.2 format (bind-style) (from ikiwiki)
+ while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
+ Extensions\s*=\s*{([^}]+)}/sgx) {
+ my $lang = $1;
+ foreach my $bit (split(/,/, $2)) {
+ $bit =~ s/.*"(.*)".*/$1/s;
+ $ext2lang{$bit} = $lang;
+ }
+ }
+ # AFAIK, all the regexps used by in filetypes.conf distributed
+ # by highlight work as Perl REs
+ while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
+ Shebang\s*=\s*\[\s*\[([^}]+)\s*\]\s*\]\s*}\s*,/sgx) {
+ my ($lang, $re) = ($1, $2);
+ eval {
+ my $perl_re = qr/$re/;
+ push @shebang, [ $lang, $perl_re ];
+ };
+ if ($@) {
+ warn "$lang shebang=[[$re]] did not work in Perl: $@";
+ }
+ }
+ (\%ext2lang, \@shebang);
+}
+
+sub new {
+ my ($class) = @_;
+ my $dir = highlight::DataDir->new;
+ $dir->initSearchDirectories('');
+ my ($ext2lang, $shebang) = _parse_filetypes($dir);
+ bless {
+ -dir => $dir,
+ -ext2lang => $ext2lang,
+ -shebang => $shebang,
+ }, $class;
+}
+
+sub _shebang2lang ($$) {
+ my ($self, $str) = @_;
+ my $shebang = $self->{-shebang};
+ foreach my $s (@$shebang) {
+ return $s->[0] if $$str =~ $s->[1];
+ }
+ undef;
+}
+
+sub _path2lang ($$) {
+ my ($self, $path) = @_;
+ my ($ext) = ($path =~ m!([^\\/\.]+)\z!);
+ $ext = lc($ext);
+ $self->{-ext2lang}->{$ext} || $ext;
+}
+
+sub do_hl {
+ my ($self, $str, $path) = @_;
+ my $lang = _path2lang($self, $path) if defined $path;
+ my $dir = $self->{-dir};
+ my $langpath;
+ if (defined $lang) {
+ $langpath = $dir->getLangPath("$lang.lang") or return;
+ $langpath = undef unless -f $langpath;
+ }
+ unless (defined $langpath) {
+ $lang = _shebang2lang($self, $str) or return;
+ $langpath = $dir->getLangPath("$lang.lang") or return;
+ $langpath = undef unless -f $langpath;
+ }
+ return unless defined $langpath;
+
+ my $gen = $self->{$langpath} ||= do {
+ my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
+ $g->setFragmentCode(1); # generate html fragment
+
+ # whatever theme works
+ my $themepath = $dir->getThemePath('print.theme');
+ $g->initTheme($themepath);
+ $g->loadLanguage($langpath);
+ $g->setEncoding('utf-8');
+ $g;
+ };
+ \($gen->generateString($$str))
+}
+
+# SWIG instances aren't reference-counted, but $self is;
+# so we need to delete all the CodeGenerator instances manually
+# at our own destruction
+sub DESTROY {
+ my ($self) = @_;
+ foreach my $gen (values %$self) {
+ if (ref($gen) eq 'highlight::CodeGenerator') {
+ highlight::CodeGenerator::deleteInstance($gen);
+ }
+ }
+}
+
+1;
use Encode qw(find_encoding);
use PublicInbox::MID qw/mid_clean mid_escape/;
use base qw/Exporter/;
-our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename/;
-
-# User-generated content (UGC) may have excessively long lines
-# and screw up rendering on some browsers, so we use pre-wrap.
-#
-# We also force everything to the same scaled font-size because GUI
-# browsers (tested both Firefox and surf (webkit)) uses a larger font
-# for the Search <form> element than the rest of the page. Font size
-# uniformity is important to people who rely on gigantic fonts.
-use constant STYLE =>
- '<style>pre{white-space:pre-wrap}*{font-size:100%}</style>';
+our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape/;
my $enc_ascii = find_encoding('us-ascii');
$class->new($raw);
}
+# some of these overrides are standard C escapes so they're
+# easy-to-understand when rendered.
+my %escape_sequence = (
+ "\x00" => '\\0', # NUL
+ "\x07" => '\\a', # bell
+ "\x08" => '\\b', # backspace
+ "\x09" => "\t", # obvious to show as-is
+ "\x0a" => "\n", # obvious to show as-is
+ "\x0b" => '\\v', # vertical tab
+ "\x0c" => '\\f', # form feed
+ "\x0d" => '\\r', # carriage ret (not preceding \n)
+ "\x1b" => '^[', # ASCII escape (mutt seems to escape this way)
+ "\x7f" => '\\x7f', # DEL
+);
+
my %xhtml_map = (
'"' => '"',
'&' => '&',
);
$xhtml_map{chr($_)} = sprintf('\\x%02x', $_) for (0..31);
-# some of these overrides are standard C escapes so they're
-# easy-to-understand when rendered.
-$xhtml_map{"\x00"} = '\\0'; # NUL
-$xhtml_map{"\x07"} = '\\a'; # bell
-$xhtml_map{"\x08"} = '\\b'; # backspace
-$xhtml_map{"\x09"} = "\t"; # obvious to show as-is
-$xhtml_map{"\x0a"} = "\n"; # obvious to show as-is
-$xhtml_map{"\x0b"} = '\\v'; # vertical tab
-$xhtml_map{"\x0c"} = '\\f'; # form feed
-$xhtml_map{"\x0d"} = '\\r'; # carriage ret (not preceding \n)
-$xhtml_map{"\x1b"} = '^['; # ASCII escape (mutt seems to escape this way)
-$xhtml_map{"\x7f"} = '\\x7f'; # DEL
+%xhtml_map = (%xhtml_map, %escape_sequence);
+
+sub src_escape ($) {
+ $_[0] =~ s/\r\n/\n/sg;
+ $_[0] =~ s/([\x7f\x00-\x1f])/$xhtml_map{$1}/sge;
+ $_[0] = $enc_ascii->encode($_[0], Encode::HTMLCREF);
+}
sub ascii_html {
my ($s) = @_;
my $CLEANUP = {}; # string(inbox) -> inbox
sub cleanup_task () {
$cleanup_timer = undef;
+ my $next = {};
for my $ibx (values %$CLEANUP) {
- foreach my $f (qw(git mm search)) {
+ my $again;
+ foreach my $f (qw(mm search)) {
delete $ibx->{$f} if SvREFCNT($ibx->{$f}) == 1;
}
+ my $expire = time - 60;
+ if (my $git = $ibx->{git}) {
+ $again = $git->cleanup($expire);
+ }
+ if (my $gits = $ibx->{-repo_objs}) {
+ foreach my $git (@$gits) {
+ $again = 1 if $git->cleanup($expire);
+ }
+ }
+ $again ||= !!($ibx->{mm} || $ibx->{search});
+ $next->{"$ibx"} = $ibx if $again;
}
- $CLEANUP = {};
+ $CLEANUP = $next;
}
sub _cleanup_later ($) {
sub base_url {
my ($self, $env) = @_;
- if ($env) { # PSGI env
- my $scheme = $env->{'psgi.url_scheme'};
+ my $scheme;
+ if ($env && ($scheme = $env->{'psgi.url_scheme'})) { # PSGI env
my $host_port = $env->{HTTP_HOST} ||
"$env->{SERVER_NAME}:$env->{SERVER_PORT}";
my $url = "$scheme://$host_port". ($env->{SCRIPT_NAME} || '/');
mids references/;
use URI::Escape qw(uri_escape_utf8);
use Digest::SHA qw/sha1_hex/;
+require PublicInbox::Address;
use constant {
MID_MAX => 40, # SHA-1 hex length # TODO: get rid of this
MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q')
push(@mids, ($v =~ /<([^>]+)>/sg));
}
}
- uniq_mids(\@mids);
+
+ # old versions of git-send-email would prompt users for
+ # In-Reply-To and users' muscle memory would use 'y' or 'n'
+ # as responses:
+ my %addr = ( y => 1, n => 1 );
+
+ foreach my $f (qw(To From Cc)) {
+ my @v = $hdr->header_raw($f);
+ foreach my $v (@v) {
+ $addr{$_} = 1 for (PublicInbox::Address::emails($v));
+ }
+ }
+ uniq_mids(\@mids, \%addr);
}
-sub uniq_mids ($) {
- my ($mids) = @_;
+sub uniq_mids ($;$) {
+ my ($mids, $seen) = @_;
my @ret;
- my %seen;
+ $seen ||= {};
foreach my $mid (@$mids) {
$mid =~ tr/\n\t\r//d;
if (length($mid) > MAX_MID_SIZE) {
warn "Message-ID: <$mid> too long, truncating\n";
$mid = substr($mid, 0, MAX_MID_SIZE);
}
- next if $seen{$mid};
+ next if $seen->{$mid};
push @ret, $mid;
- $seen{$mid} = 1;
+ $seen->{$mid} = 1;
}
\@ret;
}
sub _header ($) {
my $hdr = $_[0]->header_obj->as_string;
utf8::encode($hdr);
+ $hdr =~ s/(?<!\r)\n/\r\n/sg;
$hdr
}
-# Copyright (C) 2016-2018 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2019 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# Limits the number of processes spawned
+# Like most Perl modules in public-inbox, this is internal and
+# NOT subject to any stability guarantees! It is only documented
+# for other hackers.
+#
+# This is used to limit the number of processes spawned by the
+# PSGI server, so it acts like a semaphore and queues up extra
+# commands to be run if currently at the limit. Multiple "limiters"
+# may be configured which give inboxes different channels to
+# operate in. This can be useful to ensure smaller inboxes can
+# be cloned while cloning of large inboxes is maxed out.
+#
# This does not depend on Danga::Socket or any other external
-# scheduling mechanism, you just need to call start and finish
-# appropriately
+# scheduling mechanism, you just need to call start() and finish()
+# appropriately. However, public-inbox-httpd (which uses Danga::Socket)
+# will be able to schedule this based on readability of stdout from
+# the spawned process. See GitHTTPBackend.pm and SolverGit.pm for
+# usage examples. It does not depend on any form of threading.
+#
+# This is useful for scheduling CGI execution of both long-lived
+# git-http-backend(1) process (for "git clone") as well as short-lived
+# processes such as git-apply(1).
+
package PublicInbox::Qspawn;
use strict;
use warnings;
use PublicInbox::Spawn qw(popen_rd);
+require Plack::Util;
+
+my $def_limiter;
+# declares a command to spawn (but does not spawn it).
+# $cmd is the command to spawn
+# $env is the environ for the child process
+# $opt can include redirects and perhaps other process spawning options
sub new ($$$;) {
my ($class, $cmd, $env, $opt) = @_;
bless { args => [ $cmd, $env, $opt ] }, $class;
$cb->($self->{rpipe});
}
+sub child_err ($) {
+ my ($child_error) = @_; # typically $?
+ my $exitstatus = ($child_error >> 8) or return;
+ my $sig = $child_error & 127;
+ my $msg = "exit status=$exitstatus";
+ $msg .= " signal=$sig" if $sig;
+ $msg;
+}
+
sub finish ($) {
my ($self) = @_;
my $limiter = $self->{limiter};
my $running;
if (delete $self->{rpipe}) {
my $pid = delete $self->{pid};
- $self->{err} = $pid == waitpid($pid, 0) ? $? :
+ $self->{err} = $pid == waitpid($pid, 0) ? child_err($?) :
"PID:$pid still running?";
$running = --$limiter->{running};
}
}
}
+sub _psgi_finish ($$) {
+ my ($self, $env) = @_;
+ my $err = $self->finish;
+ if ($err && !$env->{'qspawn.quiet'}) {
+ $err = join(' ', @{$self->{args}->[0]}).": $err\n";
+ $env->{'psgi.errors'}->print($err);
+ }
+}
+
+# Similar to `backtick` or "qx" ("perldoc -f qx"), it calls $qx_cb with
+# the stdout of the given command when done; but respects the given limiter
+# $env is the PSGI env. As with ``/qx; only use this when output is small
+# and safe to slurp.
+sub psgi_qx {
+ my ($self, $env, $limiter, $qx_cb) = @_;
+ my $qx = PublicInbox::Qspawn::Qx->new;
+ my $end = sub {
+ _psgi_finish($self, $env);
+ eval { $qx_cb->($qx) };
+ $qx = undef;
+ };
+ my $rpipe;
+ my $async = $env->{'pi-httpd.async'};
+ my $cb = sub {
+ my $r = sysread($rpipe, my $buf, 8192);
+ if ($async) {
+ $async->async_pass($env->{'psgix.io'}, $qx, \$buf);
+ } elsif (defined $r) {
+ $r ? $qx->write($buf) : $end->();
+ } else {
+ return if $!{EAGAIN} || $!{EINTR}; # loop again
+ $end->();
+ }
+ };
+ $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32);
+ $self->start($limiter, sub { # may run later, much later...
+ ($rpipe) = @_;
+ if ($async) {
+ # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end)
+ $async = $async->($rpipe, $cb, $end);
+ } else { # generic PSGI
+ $cb->() while $qx;
+ }
+ });
+}
+
+# create a filter for "push"-based streaming PSGI writes used by HTTPD::Async
+sub filter_fh ($$) {
+ my ($fh, $filter) = @_;
+ Plack::Util::inline_object(
+ close => sub {
+ $fh->write($filter->(undef));
+ $fh->close;
+ },
+ write => sub {
+ $fh->write($filter->($_[0]));
+ });
+}
+
+# Used for streaming the stdout of one process as a PSGI response.
+#
+# $env is the PSGI env.
+# optional keys in $env:
+# $env->{'qspawn.wcb'} - the write callback from the PSGI server
+# optional, use this if you've already
+# captured it elsewhere. If not given,
+# psgi_return will return an anonymous
+# sub for the PSGI server to call
+#
+# $env->{'qspawn.filter'} - filter callback, receives a string as input,
+# undef on EOF
+#
+# $limiter - the Limiter object to use (uses the def_limiter if not given)
+#
+# $parse_hdr - Initial read function; often for parsing CGI header output.
+# It will be given the return value of sysread from the pipe
+# and a string ref of the current buffer. Returns an arrayref
+# for PSGI responses. 2-element arrays in PSGI mean the
+# body will be streamed, later, via writes (push-based) to
+# psgix.io. 3-element arrays means the body is available
+# immediately (or streamed via ->getline (pull-based)).
+sub psgi_return {
+ my ($self, $env, $limiter, $parse_hdr) = @_;
+ my ($fh, $rpipe);
+ my $end = sub {
+ _psgi_finish($self, $env);
+ $fh->close if $fh; # async-only
+ };
+
+ my $buf = '';
+ my $rd_hdr = sub {
+ my $r = sysread($rpipe, $buf, 1024, length($buf));
+ return if !defined($r) && ($!{EINTR} || $!{EAGAIN});
+ $parse_hdr->($r, \$buf);
+ };
+
+ my $wcb = delete $env->{'qspawn.wcb'};
+ my $async = $env->{'pi-httpd.async'};
+
+ my $cb = sub {
+ my $r = $rd_hdr->() or return;
+ $rd_hdr = undef;
+ my $filter = delete $env->{'qspawn.filter'};
+ if (scalar(@$r) == 3) { # error
+ if ($async) {
+ $async->close; # calls rpipe->close and $end
+ } else {
+ $rpipe->close;
+ $end->();
+ }
+ $wcb->($r);
+ } elsif ($async) {
+ $fh = $wcb->($r); # scalar @$r == 2
+ $fh = filter_fh($fh, $filter) if $filter;
+ $async->async_pass($env->{'psgix.io'}, $fh, \$buf);
+ } else { # for synchronous PSGI servers
+ require PublicInbox::GetlineBody;
+ $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end,
+ $buf, $filter);
+ $wcb->($r);
+ }
+ };
+ $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32);
+ my $start_cb = sub { # may run later, much later...
+ ($rpipe) = @_;
+ if ($async) {
+ # PublicInbox::HTTPD::Async->new($rpipe, $cb, $end)
+ $async = $async->($rpipe, $cb, $end);
+ } else { # generic PSGI
+ $cb->() while $rd_hdr;
+ }
+ };
+
+ # the caller already captured the PSGI write callback from
+ # the PSGI server, so we can call ->start, here:
+ return $self->start($limiter, $start_cb) if $wcb;
+
+ # the caller will return this sub to the PSGI server, so
+ # it can set the response callback (that is, for PublicInbox::HTTP,
+ # the chunked_wcb or identity_wcb callback), but other HTTP servers
+ # are supported:
+ sub {
+ ($wcb) = @_;
+ $self->start($limiter, $start_cb);
+ };
+}
+
package PublicInbox::Qspawn::Limiter;
use strict;
use warnings;
}, $class;
}
+# captures everything into a buffer and executes a callback when done
+package PublicInbox::Qspawn::Qx;
+use strict;
+use warnings;
+
+sub new {
+ my ($class) = @_;
+ my $buf = '';
+ bless \$buf, $class;
+}
+
+# called by PublicInbox::HTTPD::Async ($fh->write)
+sub write {
+ ${$_[0]} .= $_[1];
+ undef;
+}
+
1;
use POSIX qw(strftime);
use PublicInbox::OverIdx;
use PublicInbox::Spawn qw(spawn);
-require PublicInbox::Git;
+use PublicInbox::Git qw(git_unquote);
use Compress::Zlib qw(compress);
use constant {
my $xapianlevels = qr/\A(?:full|medium)\z/;
-my %GIT_ESC = (
- a => "\a",
- b => "\b",
- f => "\f",
- n => "\n",
- r => "\r",
- t => "\t",
- v => "\013",
-);
-
-sub git_unquote ($) {
- my ($s) = @_;
- return $s unless ($s =~ /\A"(.*)"\z/);
- $s = $1;
- $s =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g;
- $s =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
- $s;
-}
-
sub new {
my ($class, $ibx, $creat, $part) = @_;
my $levels = qr/\A(?:full|medium|basic)\z/;
--- /dev/null
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "Solve" blobs which don't exist in git code repositories by
+# searching inboxes for post-image blobs.
+
+# this emits a lot of debugging/tracing information which may be
+# publically viewed over HTTP(S). Be careful not to expose
+# local filesystem layouts in the process.
+package PublicInbox::SolverGit;
+use strict;
+use warnings;
+use File::Temp qw();
+use Fcntl qw(SEEK_SET);
+use PublicInbox::Git qw(git_unquote git_quote);
+use PublicInbox::Spawn qw(spawn popen_rd);
+use PublicInbox::MsgIter qw(msg_iter msg_part_text);
+use PublicInbox::Qspawn;
+use URI::Escape qw(uri_escape_utf8);
+
+# POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to
+# subtract 2048 bytes. We also don't factor in environment variable
+# headroom into this.
+use POSIX qw(sysconf _SC_ARG_MAX);
+my $ARG_SIZE_MAX = (sysconf(_SC_ARG_MAX) || 4096) - 2048;
+my $OID_MIN = 7;
+
+# By default, "git format-patch" generates filenames with a four-digit
+# prefix, so that means 9999 patch series are OK, right? :>
+# Maybe we can make this configurable, main concern is disk space overhead
+# for uncompressed patch fragments. Aside from space, public-inbox-httpd
+# is otherwise unaffected by having many patches, here, as it can share
+# work fairly. Other PSGI servers may have trouble, though.
+my $MAX_PATCH = 9999;
+
+# di = diff info / a hashref with information about a diff ($di):
+# {
+# oid_a => abbreviated pre-image oid,
+# oid_b => abbreviated post-image oid,
+# tmp => anonymous file handle with the diff,
+# hdr_lines => arrayref of various header lines for mode information
+# mode_a => original mode of oid_a (string, not integer),
+# ibx => PublicInbox::Inbox object containing the diff
+# smsg => PublicInbox::SearchMsg object containing diff
+# path_a => pre-image path
+# path_b => post-image path
+# }
+
+# don't bother if somebody sends us a patch with these path components,
+# it's junk at best, an attack attempt at worse:
+my %bad_component = map { $_ => 1 } ('', '.', '..');
+
+sub dbg ($$) {
+ print { $_[0]->{out} } $_[1], "\n" or ERR($_[0], "print(dbg): $!");
+}
+
+sub ERR ($$) {
+ my ($self, $err) = @_;
+ print { $self->{out} } $err, "\n";
+ my $ucb = delete($self->{user_cb});
+ eval { $ucb->($err) } if $ucb;
+ die $err;
+}
+
+# look for existing blobs already in git repos
+sub solve_existing ($$) {
+ my ($self, $want) = @_;
+ my $oid_b = $want->{oid_b};
+ my @ambiguous; # Array of [ git, $oids]
+ foreach my $git (@{$self->{gits}}) {
+ my ($oid_full, $type, $size) = $git->check($oid_b);
+ if (defined($type) && $type eq 'blob') {
+ return [ $git, $oid_full, $type, int($size) ];
+ }
+
+ next if length($oid_b) == 40;
+
+ # parse stderr of "git cat-file --batch-check"
+ my $err = $git->last_check_err;
+ my (@oids) = ($err =~ /\b([a-f0-9]{40})\s+blob\b/g);
+ next unless scalar(@oids);
+
+ # TODO: do something with the ambiguous array?
+ # push @ambiguous, [ $git, @oids ];
+
+ dbg($self, "`$oid_b' ambiguous in " .
+ join("\n\t", $git->pub_urls) . "\n" .
+ join('', map { "$_ blob\n" } @oids));
+ }
+ scalar(@ambiguous) ? \@ambiguous : undef;
+}
+
+sub extract_diff ($$$$$) {
+ my ($self, $p, $re, $ibx, $smsg) = @_;
+ my ($part) = @$p; # ignore $depth and @idx;
+ my $hdr_lines; # diff --git a/... b/...
+ my $tmp;
+ my $ct = $part->content_type || 'text/plain';
+ my ($s, undef) = msg_part_text($part, $ct);
+ defined $s or return;
+ my $di = {};
+
+ # Email::MIME::Encodings forces QP to be CRLF upon decoding,
+ # change it back to LF:
+ my $cte = $part->header('Content-Transfer-Encoding') || '';
+ if ($cte =~ /\bquoted-printable\b/i && $part->crlf eq "\n") {
+ $s =~ s/\r\n/\n/sg;
+ }
+
+ foreach my $l (split(/^/m, $s)) {
+ if ($l =~ $re) {
+ $di->{oid_a} = $1;
+ $di->{oid_b} = $2;
+ if (defined($3)) {
+ my $mode_a = $3;
+ if ($mode_a =~ /\A(?:100644|120000|100755)\z/) {
+ $di->{mode_a} = $mode_a;
+ }
+ }
+
+
+ # start writing the diff out to a tempfile
+ my $pn = ++$self->{tot};
+ open($tmp, '>', $self->{tmp}->dirname . "/$pn") or
+ die "open(tmp): $!";
+
+ push @$hdr_lines, $l;
+ $di->{hdr_lines} = $hdr_lines;
+ utf8::encode($_) for @$hdr_lines;
+ print $tmp @$hdr_lines or die "print(tmp): $!";
+
+ # for debugging/diagnostics:
+ $di->{ibx} = $ibx;
+ $di->{smsg} = $smsg;
+ } elsif ($l =~ m!\Adiff --git ("?[^/]+/.+) ("?[^/]+/.+)$!) {
+ last if $tmp; # got our blob, done!
+
+ my ($path_a, $path_b) = ($1, $2);
+
+ # diff header lines won't have \r because git
+ # will quote them, but Email::MIME gives CRLF
+ # for quoted-printable:
+ $path_b =~ tr/\r//d;
+
+ # don't care for leading 'a/' and 'b/'
+ my (undef, @a) = split(m{/}, git_unquote($path_a));
+ my (undef, @b) = split(m{/}, git_unquote($path_b));
+
+ # get rid of path-traversal attempts and junk patches:
+ foreach (@a, @b) {
+ return if $bad_component{$_};
+ }
+
+ $di->{path_a} = join('/', @a);
+ $di->{path_b} = join('/', @b);
+ $hdr_lines = [ $l ];
+ } elsif ($tmp) {
+ utf8::encode($l);
+ print $tmp $l or die "print(tmp): $!";
+ } elsif ($hdr_lines) {
+ push @$hdr_lines, $l;
+ if ($l =~ /\Anew file mode (100644|120000|100755)$/) {
+ $di->{mode_a} = $1;
+ }
+ }
+ }
+ return undef unless $tmp;
+ close $tmp or die "close(tmp): $!";
+ $di;
+}
+
+sub path_searchable ($) { defined($_[0]) && $_[0] =~ m!\A[\w/\. \-]+\z! }
+
+# ".." appears in path names, which confuses Xapian into treating
+# it as a range query. So we split on ".." since Xapian breaks
+# on punctuation anyways:
+sub filename_query ($) {
+ join('', map { qq( dfn:"$_") } split(/\.\./, $_[0]));
+}
+
+sub find_extract_diff ($$$) {
+ my ($self, $ibx, $want) = @_;
+ my $srch = $ibx->search or return;
+
+ my $post = $want->{oid_b} or die 'BUG: no {oid_b}';
+ $post =~ /\A[a-f0-9]+\z/ or die "BUG: oid_b not hex: $post";
+
+ my $q = "dfpost:$post";
+ my $pre = $want->{oid_a};
+ if (defined $pre && $pre =~ /\A[a-f0-9]+\z/) {
+ $q .= " dfpre:$pre";
+ } else {
+ $pre = '[a-f0-9]{7}'; # for $re below
+ }
+
+ my $path_b = $want->{path_b};
+ if (path_searchable($path_b)) {
+ $q .= filename_query($path_b);
+
+ my $path_a = $want->{path_a};
+ if (path_searchable($path_a) && $path_a ne $path_b) {
+ $q .= filename_query($path_a);
+ }
+ }
+
+ my $msgs = $srch->query($q, { relevance => 1 });
+ my $re = qr/\Aindex ($pre[a-f0-9]*)\.\.($post[a-f0-9]*)(?: (\d+))?/;
+
+ my $di;
+ foreach my $smsg (@$msgs) {
+ $ibx->smsg_mime($smsg) or next;
+ msg_iter(delete($smsg->{mime}), sub {
+ $di ||= extract_diff($self, $_[0], $re, $ibx, $smsg);
+ });
+ return $di if $di;
+ }
+}
+
+sub prepare_index ($) {
+ my ($self) = @_;
+ my $patches = $self->{patches};
+ $self->{nr} = 0;
+
+ my $di = $patches->[0] or die 'no patches';
+ my $oid_a = $di->{oid_a} or die '{oid_a} unset';
+ my $existing = $self->{found}->{$oid_a};
+
+ # no index creation for added files
+ $oid_a =~ /\A0+\z/ and return next_step($self);
+
+ die "BUG: $oid_a not not found" unless $existing;
+
+ my $oid_full = $existing->[1];
+ my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full";
+ my $mode_a = $di->{mode_a} || extract_old_mode($di);
+
+ open my $in, '+>', undef or die "open: $!";
+ print $in "$mode_a $oid_full\t$path_a\0" or die "print: $!";
+ $in->flush or die "flush: $!";
+ sysseek($in, 0, 0) or die "seek: $!";
+
+ dbg($self, 'preparing index');
+ my $rdr = { 0 => fileno($in) };
+ my $cmd = [ qw(git update-index -z --index-info) ];
+ my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr);
+ $qsp->psgi_qx($self->{psgi_env}, undef, sub {
+ my ($bref) = @_;
+ if (my $err = $qsp->{err}) {
+ ERR($self, "git update-index error: $err");
+ }
+ dbg($self, "index prepared:\n" .
+ "$mode_a $oid_full\t" . git_quote($path_a));
+ next_step($self); # onto do_git_apply
+ });
+}
+
+# pure Perl "git init"
+sub do_git_init ($) {
+ my ($self) = @_;
+ my $dir = $self->{tmp}->dirname;
+ my $git_dir = "$dir/git";
+
+ foreach ('', qw(objects refs objects/info refs/heads)) {
+ mkdir("$git_dir/$_") or die "mkdir $_: $!";
+ }
+ open my $fh, '>', "$git_dir/config" or die "open git/config: $!";
+ print $fh <<'EOF' or die "print git/config $!";
+[core]
+ repositoryFormatVersion = 0
+ filemode = true
+ bare = false
+ fsyncObjectfiles = false
+ logAllRefUpdates = false
+EOF
+ close $fh or die "close git/config: $!";
+
+ open $fh, '>', "$git_dir/HEAD" or die "open git/HEAD: $!";
+ print $fh "ref: refs/heads/master\n" or die "print git/HEAD: $!";
+ close $fh or die "close git/HEAD: $!";
+
+ my $f = 'objects/info/alternates';
+ open $fh, '>', "$git_dir/$f" or die "open: $f: $!";
+ foreach my $git (@{$self->{gits}}) {
+ print $fh $git->git_path('objects'),"\n" or die "print $f: $!";
+ }
+ close $fh or die "close: $f: $!";
+ my $tmp_git = $self->{tmp_git} = PublicInbox::Git->new($git_dir);
+ $tmp_git->{-tmp} = $self->{tmp};
+ $self->{git_env} = {
+ GIT_DIR => $git_dir,
+ GIT_INDEX_FILE => "$git_dir/index",
+ };
+ prepare_index($self);
+}
+
+sub extract_old_mode ($) {
+ my ($di) = @_;
+ if (join('', @{$di->{hdr_lines}}) =~
+ /^old mode (100644|100755|120000)\b/) {
+ return $1;
+ }
+ '100644';
+}
+
+sub do_finish ($$) {
+ my ($self, $user_cb) = @_;
+ my $found = $self->{found};
+ my $oid_want = $self->{oid_want};
+ if (my $exists = $found->{$oid_want}) {
+ return $user_cb->($exists);
+ }
+
+ # let git disambiguate if oid_want was too short,
+ # but long enough to be unambiguous:
+ my $tmp_git = $self->{tmp_git};
+ if (my @res = $tmp_git->check($oid_want)) {
+ return $user_cb->($found->{$res[0]});
+ }
+ if (my $err = $tmp_git->last_check_err) {
+ dbg($self, $err);
+ }
+ $user_cb->(undef);
+}
+
+sub do_step ($) {
+ my ($self) = @_;
+ eval {
+ # step 1: resolve blobs to patches in the todo queue
+ if (my $want = pop @{$self->{todo}}) {
+ # this populates {patches} and {todo}
+ resolve_patch($self, $want);
+
+ # step 2: then we instantiate a working tree once
+ # the todo queue is finally empty:
+ } elsif (!defined($self->{tmp_git})) {
+ do_git_init($self);
+
+ # step 3: apply each patch in the stack
+ } elsif (scalar @{$self->{patches}}) {
+ do_git_apply($self);
+
+ # step 4: execute the user-supplied callback with
+ # our result: (which may be undef)
+ # Other steps may call user_cb to terminate prematurely
+ # on error
+ } elsif (my $user_cb = delete($self->{user_cb})) {
+ do_finish($self, $user_cb);
+ } else {
+ die 'about to call user_cb twice'; # Oops :x
+ }
+ }; # eval
+ my $err = $@;
+ if ($err) {
+ $err =~ s/^\s*Exception:\s*//; # bad word to show users :P
+ dbg($self, "E: $err");
+ my $ucb = delete($self->{user_cb});
+ eval { $ucb->($err) } if $ucb;
+ }
+}
+
+sub step_cb ($) {
+ my ($self) = @_;
+ sub { do_step($self) };
+}
+
+sub next_step ($) {
+ my ($self) = @_;
+ # if outside of public-inbox-httpd, caller is expected to be
+ # looping step_cb, anyways
+ my $async = $self->{psgi_env}->{'pi-httpd.async'} or return;
+ # PublicInbox::HTTPD::Async->new
+ $async->(undef, step_cb($self));
+}
+
+sub mark_found ($$$) {
+ my ($self, $oid, $found_info) = @_;
+ my $found = $self->{found};
+ $found->{$oid} = $found_info;
+ my $oid_cur = $found_info->[1];
+ while ($oid_cur ne $oid && length($oid_cur) > $OID_MIN) {
+ $found->{$oid_cur} = $found_info;
+ chop($oid_cur);
+ }
+}
+
+sub parse_ls_files ($$$$) {
+ my ($self, $qsp, $bref, $di) = @_;
+ if (my $err = $qsp->{err}) {
+ die "git ls-files error: $err";
+ }
+
+ my ($line, @extra) = split(/\0/, $$bref);
+ scalar(@extra) and die "BUG: extra files in index: <",
+ join('> <', @extra), ">";
+
+ my ($info, $file) = split(/\t/, $line, 2);
+ my ($mode_b, $oid_b_full, $stage) = split(/ /, $info);
+ if ($file ne $di->{path_b}) {
+ die
+"BUG: index mismatch: file=$file != path_b=$di->{path_b}";
+ }
+
+ my $tmp_git = $self->{tmp_git} or die 'no git working tree';
+ my (undef, undef, $size) = $tmp_git->check($oid_b_full);
+ defined($size) or die "check $oid_b_full failed";
+
+ dbg($self, "index at:\n$mode_b $oid_b_full\t$file");
+ my $created = [ $tmp_git, $oid_b_full, 'blob', $size, $di ];
+ mark_found($self, $di->{oid_b}, $created);
+ next_step($self); # onto the next patch
+}
+
+sub start_ls_files ($$) {
+ my ($self, $di) = @_;
+ my $cmd = [qw(git ls-files -s -z)];
+ my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env});
+ $qsp->psgi_qx($self->{psgi_env}, undef, sub {
+ my ($bref) = @_;
+ eval { parse_ls_files($self, $qsp, $bref, $di) };
+ ERR($self, $@) if $@;
+ });
+}
+
+sub do_git_apply ($) {
+ my ($self) = @_;
+ my $dn = $self->{tmp}->dirname;
+ my $patches = $self->{patches};
+
+ # we need --ignore-whitespace because some patches are CRLF
+ my @cmd = (qw(git -C), $dn, qw(apply --cached --ignore-whitespace
+ --whitespace=warn --verbose));
+ my $len = length(join(' ', @cmd));
+ my $total = $self->{tot};
+ my $di; # keep track of the last one for "git ls-files"
+
+ do {
+ my $i = ++$self->{nr};
+ $di = shift @$patches;
+ dbg($self, "\napplying [$i/$total] " . di_url($self, $di) .
+ "\n" . join('', @{$di->{hdr_lines}}));
+ my $path = $total + 1 - $i;
+ $len += length($path) + 1;
+ push @cmd, $path;
+ } while (@$patches && $len < $ARG_SIZE_MAX);
+
+ my $rdr = { 2 => 1 };
+ my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}, $rdr);
+ $qsp->psgi_qx($self->{psgi_env}, undef, sub {
+ my ($bref) = @_;
+ dbg($self, $$bref);
+ if (my $err = $qsp->{err}) {
+ ERR($self, "git apply error: $err");
+ }
+ eval { start_ls_files($self, $di) };
+ ERR($self, $@) if $@;
+ });
+}
+
+sub di_url ($$) {
+ my ($self, $di) = @_;
+ # note: we don't pass the PSGI env unconditionally, here,
+ # different inboxes can have different HTTP_HOST on the same instance.
+ my $ibx = $di->{ibx};
+ my $env = $self->{psgi_env} if $ibx eq $self->{inboxes}->[0];
+ my $url = $ibx->base_url($env);
+ my $mid = $di->{smsg}->{mid};
+ defined($url) ? "$url$mid/" : "<$mid>";
+}
+
+sub resolve_patch ($$) {
+ my ($self, $want) = @_;
+
+ if (scalar(@{$self->{patches}}) > $MAX_PATCH) {
+ die "Aborting, too many steps to $self->{oid_want}";
+ }
+
+ # see if we can find the blob in an existing git repo:
+ my $cur_want = $want->{oid_b};
+ if ($self->{seen_oid}->{$cur_want}++) {
+ die "Loop detected solving $cur_want\n";
+ }
+ if (my $existing = solve_existing($self, $want)) {
+ dbg($self, "found $cur_want in " .
+ join("\n", $existing->[0]->pub_urls));
+
+ if ($cur_want eq $self->{oid_want}) { # all done!
+ eval { delete($self->{user_cb})->($existing) };
+ die "E: $@" if $@;
+ return;
+ }
+ mark_found($self, $cur_want, $existing);
+ return next_step($self); # onto patch application
+ }
+
+ # scan through inboxes to look for emails which results in
+ # the oid we want:
+ my $di;
+ foreach my $ibx (@{$self->{inboxes}}) {
+ $di = find_extract_diff($self, $ibx, $want) or next;
+
+ unshift @{$self->{patches}}, $di;
+ dbg($self, "found $cur_want in ".di_url($self, $di));
+
+ # good, we can find a path to the oid we $want, now
+ # lets see if we need to apply more patches:
+ my $src = $di->{oid_a};
+
+ unless ($src =~ /\A0+\z/) {
+ # we have to solve it using another oid, fine:
+ my $job = { oid_b => $src, path_b => $di->{path_a} };
+ push @{$self->{todo}}, $job;
+ }
+ return next_step($self); # onto the next todo item
+ }
+ if (length($cur_want) > $OID_MIN) {
+ chop($cur_want);
+ dbg($self, "retrying $want->{oid_b} as $cur_want");
+ $want->{oid_b} = $cur_want;
+ push @{$self->{todo}}, $want;
+ return next_step($self); # retry with shorter abbrev
+ }
+
+ dbg($self, "could not find $cur_want");
+ eval { delete($self->{user_cb})->(undef) }; # not found! :<
+ die "E: $@" if $@;
+}
+
+# this API is designed to avoid creating self-referential structures;
+# so user_cb never references the SolverGit object
+sub new {
+ my ($class, $ibx, $user_cb) = @_;
+
+ bless {
+ gits => $ibx->{-repo_objs},
+ user_cb => $user_cb,
+
+ # TODO: config option for searching related inboxes
+ inboxes => [ $ibx ],
+ }, $class;
+}
+
+# recreate $oid_want using $hints
+# Calls {user_cb} with: [ ::Git object, oid_full, type, size, di (diff_info) ]
+# with found object, or undef if nothing was found
+# Calls {user_cb} with a string error on fatal errors
+sub solve ($$$$$) {
+ my ($self, $env, $out, $oid_want, $hints) = @_;
+
+ # should we even get here? Probably not, but somebody
+ # could be manually typing URLs:
+ return (delete $self->{user_cb})->(undef) if $oid_want =~ /\A0+\z/;
+
+ $self->{oid_want} = $oid_want;
+ $self->{out} = $out;
+ $self->{seen_oid} = {};
+ $self->{tot} = 0;
+ $self->{psgi_env} = $env;
+ $self->{todo} = [ { %$hints, oid_b => $oid_want } ];
+ $self->{patches} = []; # [ $di, $di, ... ]
+ $self->{found} = {}; # { abbr => [ ::Git, oid, type, size, $di ] }
+ $self->{tmp} = File::Temp->newdir('solver.tmp-XXXXXXXX', TMPDIR => 1);
+
+ dbg($self, "solving $oid_want ...");
+ my $step_cb = step_cb($self);
+ if (my $async = $env->{'pi-httpd.async'}) {
+ # PublicInbox::HTTPD::Async->new
+ $async->(undef, $step_cb);
+ } else {
+ $step_cb->() while $self->{user_cb};
+ }
+}
+
+1;
--- /dev/null
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Self-updating module containing a sample CSS for client-side
+# customization by users of public-inbox. Used by Makefile.PL
+package PublicInbox::UserContent;
+use strict;
+use warnings;
+
+# this sub is updated automatically:
+sub CSS () {
+ <<'_'
+ /*
+ * Dark color scheme using 216 web-safe colors, inspired
+ * somewhat by the default color scheme in mutt.
+ * It reduces eyestrain for me, and energy usage for all:
+ * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme
+ */
+ * { background:#000; color:#ccc }
+
+ /*
+ * Underlined links add visual noise which make them hard-to-read.
+ * Use colors to make them stand out, instead.
+ */
+ a { color:#69f; text-decoration:none }
+ a:visited { color:#96f }
+
+ /* quoted text gets a different color */
+ *.q { color:#09f }
+
+ /*
+ * these may be used with cgit, too
+ * (cgit uses <div>, public-inbox uses <span>)
+ */
+ *.add { color:#0ff }
+ *.del { color:#f0f }
+ *.head { color:#fff }
+ *.hunk { color:#c93 }
+
+ /*
+ * highlight 3.x colors (tested 3.18)
+ * this doesn't use most of the colors available (I find too many
+ * colors overwhelming). So the #ccc default is commented out.
+ */
+ .hl.num { color:#f30 } /* number */
+ .hl.esc { color:#f0f } /* escape character */
+ .hl.str { color:#f30 } /* string */
+ .hl.ppc { color:#f0f } /* preprocessor */
+ .hl.pps { color:#f30 } /* preprocessor string */
+ .hl.slc { color:#09f } /* single-line comment */
+ .hl.com { color:#09f }
+ /* .hl.opt { color:#ccc } */
+ /* .hl.ipl { color:#ccc } */
+ /* .hl.lin { color:#ccc } */
+ .hl.kwa { color:#ff0 }
+ .hl.kwb { color:#0f0 }
+ .hl.kwc { color:#ff0 }
+ /* .hl.kwd { color:#ccc } */
+_
+}
+# end of auto-updated sub
+
+# return a sample CSS
+sub sample ($$) {
+ my ($ibx, $env) = @_;
+ my $url_prefix = $ibx->base_url($env);
+ my $preamble = <<"";
+/*
+ * Firefox users: this goes in \$PROFILE_FOLDER/chrome/userContent.css
+ * where \$PROFILE_FOLDER is platform-specific
+ *
+ * cf. http://kb.mozillazine.org/UserContent.css
+ * http://kb.mozillazine.org/Profile_folder_-_Firefox
+ *
+ * Users of dillo can remove the entire lines with "moz-only"
+ * in them and place the resulting file in ~/.dillo/style.css
+ */
+\@-moz-document url-prefix($url_prefix) { /* moz-only */
+
+ $preamble . CSS() . "\n} /* moz-only */\n";
+}
+
+# Auto-update this file based on the contents of a CSS file:
+# usage: perl -I lib __FILE__ contrib/css/216dark.css
+# (See Makefile.PL)
+if (scalar(@ARGV) == 1 && -r __FILE__) {
+ use autodie;
+ open my $ro, '<', $ARGV[0];
+ my $css = do { local $/; <$ro> };
+ $css =~ s/^([ \t]*\S)/\t$1/smg;
+ open my $rw, '+<', __FILE__;
+ my $out = do { local $/; <$rw> };
+ $out =~ s/^sub CSS.*^_\n\}/sub CSS () {\n\t<<'_'\n${css}_\n}/sm;
+ seek $rw, 0, 0;
+ print $rw $out;
+}
+
+1;
use PublicInbox::Address;
use PublicInbox::WwwStream;
use PublicInbox::Reply;
+use PublicInbox::ViewDiff qw(flush_diff);
require POSIX;
use Time::Local qw(timegm);
my ($ctx, $mime, $more, $smsg) = @_;
my $hdr = $mime->header_obj;
my $ibx = $ctx->{-inbox};
- my $obfs_ibx = $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
+ $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
my $tip = _msg_html_prepare($hdr, $ctx, $more, 0);
my $end = 2;
PublicInbox::WwwStream->response($ctx, 200, sub {
if ($nr == 1) {
# $more cannot be true w/o $smsg being defined:
my $upfx = $more ? '../'.mid_escape($smsg->mid).'/' : '';
- $tip . multipart_text_as_html($mime, $upfx, $obfs_ibx) .
+ $tip . multipart_text_as_html($mime, $upfx, $ibx) .
'</pre><hr>'
} elsif ($more && @$more) {
++$end;
my $str = eval {
my ($id, $prev, $smsg) = @$more;
my $mid = $ctx->{mid};
- $smsg = $ctx->{-inbox}->smsg_mime($smsg);
+ my $ibx = $ctx->{-inbox};
+ $smsg = $ibx->smsg_mime($smsg);
my $next = $ctx->{srch}->next_by_mid($mid, \$id, \$prev);
@$more = $next ? ($id, $prev, $next) : ();
if ($smsg) {
my $mime = $smsg->{mime};
my $upfx = '../' . mid_escape($smsg->mid) . '/';
_msg_html_prepare($mime->header_obj, $ctx, $more, $nr) .
- multipart_text_as_html($mime, $upfx,
- $ctx->{-obfs_ibx}) .
+ multipart_text_as_html($mime, $upfx, $ibx) .
'</pre><hr>'
} else {
'';
$rv .= "\n";
# scan through all parts, looking for displayable text
- msg_iter($mime, sub { $rv .= add_text_body($mhref, $obfs_ibx, $_[0]) });
+ my $ibx = $ctx->{-inbox};
+ msg_iter($mime, sub { $rv .= add_text_body($mhref, $ibx, $_[0]) });
# add the footer
$rv .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
}
sub multipart_text_as_html {
- my ($mime, $upfx, $obfs_ibx) = @_;
+ my ($mime, $upfx, $ibx) = @_;
my $rv = "";
# scan through all parts, looking for displayable text
- msg_iter($mime, sub { $rv .= add_text_body($upfx, $obfs_ibx, $_[0]) });
+ msg_iter($mime, sub { $rv .= add_text_body($upfx, $ibx, $_[0]) });
$rv;
}
}
sub add_text_body {
- my ($upfx, $obfs_ibx, $p) = @_;
+ my ($upfx, $ibx, $p) = @_;
+ my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
# $p - from msg_iter: [ Email::MIME, depth, @idx ]
my ($part, $depth) = @$p; # attachment @idx is unused
my $ct = $part->content_type || 'text/plain';
return attach_link($upfx, $ct, $p, $fn) unless defined $s;
+ # makes no difference to browsers, and don't screw up filename
+ # link generation in diffs with the extra '%0D'
+ $s =~ s/\r\n/\n/sg;
+
+ my ($diff, $spfx);
+ if ($s =~ /^(?:diff|---|\+{3}) /ms) {
+ $diff = [];
+ if ($ibx->{-repo_objs}) {
+ my $n_slash = $upfx =~ tr!/!/!;
+ if ($n_slash == 0) {
+ $spfx = '../';
+ } elsif ($n_slash == 1) {
+ $spfx = '';
+ } else { # nslash == 2
+ $spfx = '../../';
+ }
+ }
+ };
+
my @lines = split(/^/m, $s);
$s = '';
if (defined($fn) || $depth > 0 || $err) {
# show the previously buffered quote inline
flush_quote(\$s, $l, \@quot) if @quot;
- # regular line, OK
- $l->linkify_1($cur);
- $s .= $l->linkify_2(ascii_html($cur));
+ if ($diff) {
+ push @$diff, $cur;
+ } else {
+ # regular line, OK
+ $l->linkify_1($cur);
+ $s .= $l->linkify_2(ascii_html($cur));
+ }
} else {
+ flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
push @quot, $cur;
}
}
if (@quot) { # ugh, top posted
flush_quote(\$s, $l, \@quot);
+ flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
$s;
} else {
+ flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
if ($s =~ /\n\z/s) { # common, last line ends with a newline
$s;
PublicInbox::ExtMsg::ext_msg($ctx);
}
-sub _msg_date {
- my ($hdr) = @_;
- fmt_ts(msg_datestamp($hdr));
-}
-
sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
sub dedupe_subject {
my $mbox = qq(<a\nhref="$href/t.mbox.gz">mbox.gz</a>);
my $atom = qq(<a\nhref="$href/t.atom">Atom</a>);
- my $s = "<a\nhref=\"$href/T/$anchor\"><b>$top</b></a>\n" .
+ my $s = "<a\nhref=\"$href/T/$anchor\">$top</a>\n" .
" $ds UTC $n - $mbox / $atom\n";
for (my $i = 0; $i < scalar(@ex); $i += 2) {
my $level = $ex[$i];
--- /dev/null
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# used by PublicInbox::View
+# This adds CSS spans for diff highlighting.
+# It also generates links for ViewVCS + SolverGit to show
+# (or reconstruct) blobs.
+
+package PublicInbox::ViewDiff;
+use strict;
+use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw(flush_diff);
+use URI::Escape qw(uri_escape_utf8);
+use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::Git qw(git_unquote);
+
+sub DSTATE_INIT () { 0 }
+sub DSTATE_STAT () { 1 } # TODO
+sub DSTATE_HEAD () { 2 } # /^diff --git /, /^index /, /^--- /, /^\+\+\+ /
+sub DSTATE_CTX () { 3 } # /^ /
+sub DSTATE_ADD () { 4 } # /^\+/
+sub DSTATE_DEL () { 5 } # /^\-/
+my @state2class = (
+ '', # init
+ '', # stat
+ 'head',
+ '', # ctx
+ 'add',
+ 'del'
+);
+
+sub UNSAFE () { "^A-Za-z0-9\-\._~/" }
+
+my $OID_NULL = '0{7,40}';
+my $OID_BLOB = '[a-f0-9]{7,40}';
+my $PATH_A = '"?a/.+|/dev/null';
+my $PATH_B = '"?b/.+|/dev/null';
+
+sub to_html ($$) {
+ $_[0]->linkify_1($_[1]);
+ $_[0]->linkify_2(ascii_html($_[1]));
+}
+
+# link to line numbers in blobs
+sub diff_hunk ($$$$) {
+ my ($dctx, $spfx, $ca, $cb) = @_;
+ my $oid_a = $dctx->{oid_a};
+ my $oid_b = $dctx->{oid_b};
+
+ (defined($spfx) && defined($oid_a) && defined($oid_b)) or
+ return "@@ $ca $cb @@";
+
+ my ($n) = ($ca =~ /^-(\d+)/);
+ $n = defined($n) ? do { ++$n; "#n$n" } : '';
+
+ my $rv = qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>);
+
+ ($n) = ($cb =~ /^\+(\d+)/);
+ $n = defined($n) ? do { ++$n; "#n$n" } : '';
+
+ $rv .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@);
+}
+
+sub oid ($$$) {
+ my ($dctx, $spfx, $oid) = @_;
+ defined($spfx) ? qq(<a\nhref="$spfx$oid/s/$dctx->{Q}">$oid</a>) : $oid;
+}
+
+sub to_state ($$$) {
+ my ($dst, $state, $new_state) = @_;
+ $$dst .= '</span>' if $state2class[$state];
+ $_[1] = $new_state;
+ my $class = $state2class[$new_state] or return;
+ $$dst .= qq(<span\nclass="$class">);
+}
+
+sub flush_diff ($$$$) {
+ my ($dst, $spfx, $linkify, $diff) = @_;
+ my $state = DSTATE_INIT;
+ my $dctx = { Q => '' }; # {}, keys: oid_a, oid_b, path_a, path_b
+
+ foreach my $s (@$diff) {
+ if ($s =~ /^ /) {
+ if ($state2class[$state]) {
+ to_state($dst, $state, DSTATE_CTX);
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^-- $/) { # email signature begins
+ $state == DSTATE_INIT or
+ to_state($dst, $state, DSTATE_INIT);
+ $$dst .= $s;
+ } elsif ($s =~ m!^diff --git ($PATH_A) ($PATH_B)$!) {
+ if ($state != DSTATE_HEAD) {
+ my ($pa, $pb) = ($1, $2);
+ to_state($dst, $state, DSTATE_HEAD);
+ $pa = (split('/', git_unquote($pa), 2))[1];
+ $pb = (split('/', git_unquote($pb), 2))[1];
+ $dctx = {
+ Q => "?b=".uri_escape_utf8($pb, UNSAFE),
+ };
+ if ($pa ne $pb) {
+ $dctx->{Q} .=
+ "&a=".uri_escape_utf8($pa, UNSAFE);
+ }
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
+ $$dst .= $1 . oid($dctx, $spfx, $2);
+ $dctx = { Q => '' };
+ $$dst .= to_html($linkify, $s) ;
+ } elsif ($s =~ s/^index ($OID_BLOB)(\.\.$OID_NULL)\b//o) {
+ $$dst .= 'index ' . oid($dctx, $spfx, $1) . $2;
+ $dctx = { Q => '' };
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^index ($OID_BLOB)\.\.($OID_BLOB)/o) {
+ $dctx->{oid_a} = $1;
+ $dctx->{oid_b} = $2;
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ s/^@@ (\S+) (\S+) @@//) {
+ $$dst .= '</span>' if $state2class[$state];
+ $$dst .= qq(<span\nclass="hunk">);
+ $$dst .= diff_hunk($dctx, $spfx, $1, $2);
+ $$dst .= '</span>';
+ $state = DSTATE_CTX;
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ m!^--- $PATH_A! ||
+ $s =~ m!^\+{3} $PATH_B!) {
+ # color only (no oid link)
+ $state == DSTATE_INIT and
+ to_state($dst, $state, DSTATE_HEAD);
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^\+/) {
+ if ($state != DSTATE_ADD && $state != DSTATE_INIT) {
+ to_state($dst, $state, DSTATE_ADD);
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^-/) {
+ if ($state != DSTATE_DEL && $state != DSTATE_INIT) {
+ to_state($dst, $state, DSTATE_DEL);
+ }
+ $$dst .= to_html($linkify, $s);
+ # ignore the following lines in headers:
+ } elsif ($s =~ /^(?:dis)similarity index/ ||
+ $s =~ /^(?:old|new) mode/ ||
+ $s =~ /^(?:deleted|new) file mode/ ||
+ $s =~ /^(?:copy|rename) (?:from|to) / ||
+ $s =~ /^(?:dis)?similarity index /) {
+ $$dst .= to_html($linkify, $s);
+ } else {
+ $state == DSTATE_INIT or
+ to_state($dst, $state, DSTATE_INIT);
+ $$dst .= to_html($linkify, $s);
+ }
+ }
+ @$diff = ();
+ $$dst .= '</span>' if $state2class[$state];
+ undef;
+}
+
+1;
--- /dev/null
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# show any VCS object, similar to "git show"
+# FIXME: we only show blobs for now
+#
+# This can use a "solver" to reconstruct blobs based on git
+# patches (with abbreviated OIDs in the header). However, the
+# abbreviated OIDs must match exactly what's in the original
+# email (unless a normal code repo already has the blob).
+#
+# In other words, we can only reliably reconstruct blobs based
+# on links generated by ViewDiff (and only if the emailed
+# patches apply 100% cleanly to published blobs).
+
+package PublicInbox::ViewVCS;
+use strict;
+use warnings;
+use Encode qw(find_encoding);
+use PublicInbox::SolverGit;
+use PublicInbox::WwwStream;
+use PublicInbox::Linkify;
+use PublicInbox::Hval qw(ascii_html to_filename src_escape);
+my $hl = eval {
+ require PublicInbox::HlMod;
+ PublicInbox::HlMod->new;
+};
+
+# we need to trigger highlight::CodeGenerator::deleteInstance
+# in HlMod::DESTROY before the rest of Perl shuts down to avoid
+# a segfault at shutdown
+END { $hl = undef };
+
+my %QP_MAP = ( A => 'oid_a', B => 'oid_b', a => 'path_a', b => 'path_b' );
+my $max_size = 1024 * 1024; # TODO: configurable
+my $enc_utf8 = find_encoding('UTF-8');
+my $BIN_DETECT = 8000; # same as git
+
+sub html_page ($$$) {
+ my ($ctx, $code, $strref) = @_;
+ my $wcb = delete $ctx->{-wcb};
+ $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/"
+ my $res = PublicInbox::WwwStream->response($ctx, $code, sub {
+ my ($nr, undef) = @_;
+ $nr == 1 ? $$strref : undef;
+ });
+ $wcb ? $wcb->($res) : $res;
+}
+
+sub stream_large_blob ($$$$) {
+ my ($ctx, $res, $logref, $fn) = @_;
+ my ($git, $oid, $type, $size, $di) = @$res;
+ my $cmd = ['git', "--git-dir=$git->{git_dir}", 'cat-file', $type, $oid];
+ my $qsp = PublicInbox::Qspawn->new($cmd);
+ my @cl = ('Content-Length', $size);
+ my $env = $ctx->{env};
+ $env->{'qspawn.wcb'} = delete $ctx->{-wcb};
+ $qsp->psgi_return($env, undef, sub {
+ my ($r, $bref) = @_;
+ if (!defined $r) { # error
+ html_page($ctx, 500, $logref);
+ } elsif (index($$bref, "\0") >= 0) {
+ my $ct = 'application/octet-stream';
+ [200, ['Content-Type', $ct, @cl ] ];
+ } else {
+ my $n = bytes::length($$bref);
+ if ($n >= $BIN_DETECT || $n == $size) {
+ my $ct = 'text/plain; charset=UTF-8';
+ return [200, ['Content-Type', $ct, @cl] ];
+ }
+ undef; # bref keeps growing
+ }
+ });
+}
+
+sub solve_result {
+ my ($ctx, $res, $log, $hints, $fn) = @_;
+
+ unless (seek($log, 0, 0)) {
+ $ctx->{env}->{'psgi.errors'}->print("seek(log): $!\n");
+ return html_page($ctx, 500, \'seek error');
+ }
+ $log = do { local $/; <$log> };
+
+ my $ref = ref($res);
+ my $l = PublicInbox::Linkify->new;
+ $l->linkify_1($log);
+ $log = '<pre>debug log:</pre><hr /><pre>' .
+ $l->linkify_2(ascii_html($log)) . '</pre>';
+
+ $res or return html_page($ctx, 404, \$log);
+ $ref eq 'ARRAY' or return html_page($ctx, 500, \$log);
+
+ my ($git, $oid, $type, $size, $di) = @$res;
+ my $path = to_filename($di->{path_b} || $hints->{path_b} || 'blob');
+ my $raw_link = "(<a\nhref=$path>raw</a>)";
+ if ($size > $max_size) {
+ return stream_large_blob($ctx, $res, \$log, $fn) if defined $fn;
+ $log = "<pre><b>Too big to show, download available</b>\n" .
+ "$oid $type $size bytes $raw_link</pre>" . $log;
+ return html_page($ctx, 500, \$log);
+ }
+
+ my $blob = $git->cat_file($oid);
+ if (!$blob) { # WTF?
+ my $e = "Failed to retrieve generated blob ($oid)";
+ $ctx->{env}->{'psgi.errors'}->print("$e ($git->{git_dir})\n");
+ $log = "<pre><b>$e</b></pre>" . $log;
+ return html_page($ctx, 500, \$log);
+ }
+
+ my $binary = index($$blob, "\0") >= 0;
+ if ($fn) {
+ my $h = [ 'Content-Length', $size, 'Content-Type' ];
+ push(@$h, ($binary ? 'application/octet-stream' : 'text/plain'));
+ return delete($ctx->{-wcb})->([200, $h, [ $$blob ]]);
+ }
+
+ if ($binary) {
+ $log = "<pre>$oid $type $size bytes (binary)" .
+ " $raw_link</pre>" . $log;
+ return html_page($ctx, 200, \$log);
+ }
+
+ $$blob = $enc_utf8->decode($$blob);
+ my $nl = ($$blob =~ tr/\n/\n/);
+ my $pad = length($nl);
+
+ $l->linkify_1($$blob);
+ my $ok = $hl->do_hl($blob, $path) if $hl;
+ if ($ok) {
+ $$ok = $enc_utf8->decode($$ok);
+ src_escape($$ok);
+ $blob = $ok;
+ } else {
+ $$blob = ascii_html($$blob);
+ }
+
+ # using some of the same CSS class names and ids as cgit
+ $log = "<pre>$oid $type $size bytes $raw_link</pre>" .
+ "<hr /><table\nclass=blob>".
+ "<tr><td\nclass=linenumbers><pre>" . join('', map {
+ sprintf("<a id=n$_ href=#n$_>% ${pad}u</a>\n", $_)
+ } (1..$nl)) . '</pre></td>' .
+ '<td><pre> </pre></td>'. # pad for non-CSS users
+ "<td\nclass=lines><pre\nstyle='white-space:pre'><code>" .
+ $l->linkify_2($$blob) .
+ '</code></pre></td></tr></table>' . $log;
+
+ html_page($ctx, 200, \$log);
+}
+
+sub show ($$;$) {
+ my ($ctx, $oid_b, $fn) = @_;
+ my $qp = $ctx->{qp};
+ my $hints = {};
+ while (my ($from, $to) = each %QP_MAP) {
+ defined(my $v = $qp->{$from}) or next;
+ $hints->{$to} = $v;
+ }
+
+ open my $log, '+>', undef or die "open: $!";
+ my $solver = PublicInbox::SolverGit->new($ctx->{-inbox}, sub {
+ solve_result($ctx, $_[0], $log, $hints, $fn);
+ });
+
+ # PSGI server will call this and give us a callback
+ sub {
+ $ctx->{-wcb} = $_[0]; # HTTP write callback
+ $solver->solve($ctx->{env}, $log, $oid_b, $hints);
+ };
+}
+
+1;
# We focus on the lowest common denominators here:
# - targeted at text-only console browsers (w3m, links, etc..)
# - Only basic HTML, CSS only for line-wrapping <pre> text content for GUIs
+# and diff/syntax-highlighting (optional)
# - No JavaScript, graphics or icons allowed.
# - Must not rely on static content
# - UTF-8 is only for user-content, 7-bit US-ASCII for us
use PublicInbox::MID qw(mid_escape);
require PublicInbox::Git;
use PublicInbox::GitHTTPBackend;
-our $INBOX_RE = qr!\A/([\w\.\-]+)!;
+use PublicInbox::UserContent;
+
+# TODO: consider a routing tree now that we have more endpoints:
+our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
our $MID_RE = qr!([^/]+)!;
our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i;
+our $OID_RE = qr![a-f0-9]{7,40}!;
sub new {
my ($class, $pi_config) = @_;
r301($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) {
get_text($ctx, $1, $2);
-
+ } elsif ($path_info =~ m!$INBOX_RE/([\w\-\.]+)\.css\z!o) {
+ get_css($ctx, $1, $2);
+ } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) {
+ get_vcs_object($ctx, $1, $2);
+ } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) {
+ get_vcs_object($ctx, $1, $2, $3);
+ } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
+ r301($ctx, $1, $2, 's/');
# convenience redirects order matters
} elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
r301($ctx, $1, $2);
# for CoW-friendliness, MOOOOO!
sub preload {
+ my ($self) = @_;
require PublicInbox::Feed;
require PublicInbox::View;
require PublicInbox::SearchThread;
PublicInbox::NewsWWW)) {
eval "require $_;";
}
+ if (ref($self)) {
+ $self->stylesheets_prepare($_) for ('', '../', '../../');
+ }
}
# private functions below
PublicInbox::WwwText::get_text($ctx, $key);
}
+# show git objects (blobs and commits)
+# /$INBOX/_/$OBJECT_ID/show
+# /$INBOX/_/${OBJECT_ID}_${FILENAME}
+# KEY may contain slashes
+sub get_vcs_object ($$$;$) {
+ my ($ctx, $inbox, $oid, $filename) = @_;
+ my $r404 = invalid_inbox($ctx, $inbox);
+ return $r404 if $r404;
+ require PublicInbox::ViewVCS;
+ PublicInbox::ViewVCS::show($ctx, $oid, $filename);
+}
+
sub ctx_get {
my ($ctx, $key) = @_;
my $val = $ctx->{$key};
PublicInbox::WwwAttach::get_attach($ctx, $idx, $fn);
}
+# User-generated content (UGC) may have excessively long lines
+# and screw up rendering on some browsers, so we use pre-wrap.
+#
+# We also force everything to the same scaled font-size because GUI
+# browsers (tested both Firefox and surf (webkit)) uses a larger font
+# for the Search <form> element than the rest of the page. Font size
+# uniformity is important to people who rely on gigantic fonts.
+# Finally, we use monospace to ensure the Search field and button
+# has the same size and spacing as everything else which is
+# <pre>-formatted anyways.
+our $STYLE = 'pre{white-space:pre-wrap}*{font-size:100%;font-family:monospace}';
+
+sub stylesheets_prepare ($$) {
+ my ($self, $upfx) = @_;
+ my $mini = eval {
+ require CSS::Minifier;
+ sub { CSS::Minifier::minify(input => $_[0]) };
+ } || eval {
+ require CSS::Minifier::XS;
+ sub { CSS::Minifier::XS::minify($_[0]) };
+ } || sub { $_[0] };
+
+ my $css_map = {};
+ my $stylesheets = $self->{pi_config}->{css} || [];
+ my $links = [];
+ my $inline_ok = 1;
+
+ foreach my $s (@$stylesheets) {
+ my $attr = {};
+ local $_ = $s;
+ foreach my $k (qw(media title href)) {
+ if (s/\s*$k='([^']+)'// || s/\s*$k=(\S+)//) {
+ $attr->{$k} = $1;
+ }
+ }
+
+ if (defined $attr->{href}) {
+ $inline_ok = 0;
+ } else {
+ open(my $fh, '<', $_) or do {
+ warn "failed to open $_: $!\n";
+ next;
+ };
+ my ($key) = (m!([^/]+?)(?:\.css)?\z!i);
+ my $ctime = 0;
+ my $local = do { local $/; <$fh> };
+ if ($local =~ /\S/) {
+ $ctime = sprintf('%x',(stat($fh))[10]);
+ $local = $mini->($local);
+ }
+ $css_map->{$key} = $local;
+ $attr->{href} = "$upfx$key.css?$ctime";
+ if (defined($attr->{title})) {
+ $inline_ok = 0;
+ } elsif (($attr->{media}||'screen') eq 'screen') {
+ $attr->{-inline} = $local;
+ }
+ }
+ push @$links, $attr;
+ }
+
+ my $buf = "<style>$STYLE";
+ if ($inline_ok) {
+ my @ext; # for media=print and whatnot
+ foreach my $attr (@$links) {
+ if (defined(my $str = delete $attr->{-inline})) {
+ $buf .= $str;
+ } else {
+ push @ext, $attr;
+ }
+ }
+ $links = \@ext;
+ }
+ $buf .= '</style>';
+
+ if (@$links) {
+ foreach my $attr (@$links) {
+ delete $attr->{-inline};
+ $buf .= "<link\ntype=text/css\nrel=stylesheet";
+ while (my ($k, $v) = each %$attr) {
+ $v = qq{"$v"} if $v =~ /[\s=]/;
+ $buf .= qq{\n$k=$v};
+ }
+ $buf .= ' />';
+ }
+ $self->{"-style-$upfx"} = $buf;
+ } else {
+ $self->{-style_inline} = $buf;
+ }
+ $self->{-css_map} = $css_map;
+}
+
+# returns an HTML fragment with <style> or <link> tags in them
+# Called by WwwStream by nearly every HTML page
+sub style {
+ my ($self, $upfx) = @_;
+ $self->{-style_inline} || $self->{"-style-$upfx"} || do {
+ stylesheets_prepare($self, $upfx);
+ $self->{-style_inline} || $self->{"-style-$upfx"}
+ };
+}
+
+# /$INBOX/$KEY.css endpoint
+# CSS is configured globally for all inboxes, but we access them on
+# a per-inbox basis. This allows administrators to setup per-inbox
+# static routes to intercept the request before it hits PSGI
+sub get_css ($$$) {
+ my ($ctx, $inbox, $key) = @_;
+ my $r404 = invalid_inbox($ctx, $inbox);
+ return $r404 if $r404;
+ my $self = $ctx->{www};
+ my $css_map = $self->{-css_map} || stylesheets_prepare($self, '');
+ my $css = $css_map->{$key};
+ if (!defined($css) && $key eq 'userContent') {
+ my $env = $ctx->{env};
+ $css = PublicInbox::UserContent::sample($ctx->{-inbox}, $env);
+ }
+ defined $css or return r404();
+ my $h = [ 'Content-Length', bytes::length($css),
+ 'Content-Type', 'text/css' ];
+ PublicInbox::GitHTTPBackend::cache_one_year($h);
+ [ 200, $h, [ $css ] ];
+}
+
1;
--- /dev/null
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Standalone PSGI app to provide syntax highlighting as-a-service
+# via "highlight" Perl module ("libhighlight-perl" in Debian).
+#
+# This allows exposing highlight as a persistent HTTP service for
+# other scripts via HTTP PUT requests. PATH_INFO will be used
+# as a hint for detecting the language for highlight.
+#
+# The following example using curl(1) will do the right thing
+# regarding the file extension:
+#
+# curl -HExpect: -T /path/to/file http://example.com/
+#
+# You can also force a file extension by giving a path
+# (in this case, "c") via:
+#
+# curl -HExpect: -T /path/to/file http://example.com/x.c
+
+package PublicInbox::WwwHighlight;
+use strict;
+use warnings;
+use HTTP::Status qw(status_message);
+use parent qw(PublicInbox::HlMod);
+
+# TODO: support highlight(1) for distros which don't package the
+# SWIG extension. Also, there may be admins who don't want to
+# have ugly SWIG-generated code in a long-lived Perl process.
+
+sub r ($) {
+ my ($code) = @_;
+ my $msg = status_message($code);
+ my $len = length($msg);
+ [ $code, [qw(Content-Type text/plain Content-Length), $len], [$msg] ]
+}
+
+# another slurp API hogging up all my memory :<
+# This is capped by whatever the PSGI server allows,
+# $ENV{GIT_HTTP_MAX_REQUEST_BUFFER} for PublicInbox::HTTP (10 MB)
+sub read_in_full ($) {
+ my ($env) = @_;
+
+ my $in = $env->{'psgi.input'};
+ my $off = 0;
+ my $buf = '';
+ my $len = $env->{CONTENT_LENGTH} || 8192;
+ while (1) {
+ my $r = $in->read($buf, $len, $off);
+ last unless defined $r;
+ return \$buf if $r == 0;
+ $off += $r;
+ }
+ $env->{'psgi.errors'}->print("input read error: $!\n");
+ undef;
+}
+
+# entry point for PSGI
+sub call {
+ my ($self, $env) = @_;
+ my $req_method = $env->{REQUEST_METHOD};
+
+ return r(405) if $req_method ne 'PUT';
+
+ my $bref = read_in_full($env) or return r(500);
+ $bref = $self->do_hl($bref, $env->{PATH_INFO});
+
+ my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
+ push @$h, 'Content-Length', bytes::length($$bref);
+
+ [ 200, $h, [ $$bref ] ]
+}
+
+1;
my $title = $ctx->{-title_html} || $desc;
my $upfx = $ctx->{-upfx} || '';
my $help = $upfx.'_/text/help';
+ my $color = $upfx.'_/text/color';
my $atom = $ctx->{-atom} || $upfx.'new.atom';
my $tip = $ctx->{-html_tip} || '';
my $top = "<b>$desc</b>";
my $links = "<a\nhref=\"$help\">help</a> / ".
+ "<a\nhref=\"$color\">color</a> / ".
"<a\nhref=\"$atom\">Atom feed</a>";
if ($obj->search) {
my $q_val = $ctx->{-q_value_html};
"<html><head><title>$title</title>" .
"<link\nrel=alternate\ntitle=\"Atom feed\"\n".
"href=\"$atom\"\ntype=\"application/atom+xml\"/>" .
- PublicInbox::Hval::STYLE .
+ $ctx->{www}->style($upfx) .
"</head><body>". $top . $tip;
}
1;
}
+sub _colors_help ($$) {
+ my ($ctx, $txt) = @_;
+ my $ibx = $ctx->{-inbox};
+ my $base_url = $ibx->base_url($ctx->{env});
+ $$txt .= "color customization for $base_url\n";
+ $$txt .= <<EOF;
+
+public-inbox provides a stable set of CSS classes for users to
+customize colors for highlighting diffs and code.
+
+Users of browsers such as dillo, Firefox, or some browser
+extensions may start by downloading the following sample CSS file
+to control the colors they see:
+
+ ${base_url}userContent.css
+
+CSS classes
+-----------
+
+ span.q - quoted text in email messages
+
+For diff highlighting, we try to match class names with those
+used by cgit: https://git.zx2c4.com/cgit/
+
+ span.add - diff post-image lines
+
+ span.del - diff pre-image lines
+
+ span.head - diff header (metainformation)
+
+ span.hunk - diff hunk-header
+
+EOF
+}
sub _default_text ($$$) {
my ($ctx, $key, $txt) = @_;
+ return _colors_help($ctx, $txt) if $key eq 'color';
return if $key ne 'help'; # TODO more keys?
my $ibx = $ctx->{-inbox};
}
} else {
require PublicInbox::WWW;
- PublicInbox::WWW->preload;
my $www = PublicInbox::WWW->new;
+ $www->preload;
$app = builder {
eval {
enable 'Deflater',
my $config = eval { PublicInbox::Config->new } || eval {
warn "public-inbox unconfigured for serving, indexing anyways...\n";
- {}
+ undef;
};
eval { require PublicInbox::SearchIdx };
if ($@) {
sub usage { print STDERR "Usage: $usage\n"; exit 1 }
usage() unless @dirs;
-$config->each_inbox(sub {
+defined($config) and $config->each_inbox(sub {
my ($ibx) = @_;
for my $i (0..$#dirs) {
#!/usr/bin/perl -w
-# Copyright (C) 2016-2018 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2019 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Parallel WWW checker
my $usage = "$0 [-j JOBS] [-s SLOW_THRESHOLD] URL_OF_INBOX\n";
use Time::HiRes qw(gettimeofday tv_interval);
use WWW::Mechanize;
use Data::Dumper;
+
+# we want to use vfork+exec with spawn, WWW::Mechanize can use too much
+# memory and fork(2) fails
+use PublicInbox::Spawn qw(spawn which);
+$ENV{PERL_INLINE_DIRECTORY} or warn "PERL_INLINE_DIRECTORY unset, may OOM\n";
+
+our $tmp_owner = $$;
my $nproc = 4;
my $slow = 0.5;
my %opts = (
GetOptions(%opts) or die "bad command-line args\n$usage";
my $root_url = shift or die $usage;
+chomp(my $xmlstarlet = which('xmlstarlet'));
+my $atom_check = eval {
+ my $cmd = [ qw(xmlstarlet val -e -) ];
+ sub {
+ my ($in, $out, $err) = @_;
+ use autodie;
+ open my $in_fh, '+>', undef;
+ open my $out_fh, '+>', undef;
+ open my $err_fh, '+>', undef;
+ print $in_fh $$in;
+ $in_fh->flush;
+ sysseek($in_fh, 0, 0);
+ my $rdr = {
+ 0 => fileno($in_fh),
+ 1 => fileno($out_fh),
+ 2 => fileno($err_fh),
+ };
+ my $pid = spawn($cmd, undef, $rdr);
+ defined $pid or die "spawn failure: $!";
+ while (waitpid($pid, 0) != $pid) {
+ next if $!{EINTR};
+ warn "waitpid(xmlstarlet, $pid) $!";
+ return $!;
+ }
+ sysseek($out_fh, 0, 0);
+ sysread($out_fh, $$out, -s $out_fh);
+ sysseek($err_fh, 0, 0);
+ sysread($err_fh, $$err, -s $err_fh);
+ $?
+ }
+} if $xmlstarlet;
+
my %workers;
+$SIG{INT} = sub { exit 130 };
$SIG{TERM} = sub { exit 0 };
$SIG{CHLD} = sub {
while (1) {
sub worker_loop {
my ($todo_rd, $done_wr) = @_;
+ $SIG{CHLD} = 'DEFAULT';
my $m = WWW::Mechanize->new(autocheck => 0);
my $cc = LWP::ConnCache->new;
+ $m->stack_depth(0); # no history
$m->conn_cache($cc);
while (1) {
$todo_rd->recv(my $u, 65535, 0);
my $s;
# blocking
foreach my $l (@links, "DONE\t$u") {
- next if $l eq '';
+ next if $l eq '' || $l =~ /\.mbox(?:\.gz)\z/;
do {
$s = $done_wr->send($l, MSG_EOR);
} while (!defined $s && $!{EINTR});
# make sure the HTML source doesn't screw up terminals
# when people curl the source (not remotely an expert
# on languages or encodings, here).
- next if $r->header('Content-Type') !~ m!\btext/html\b!;
+ my $ct = $r->header('Content-Type') || '';
+ warn "no Content-Type: $u\n" if $ct eq '';
+
+ if ($atom_check && $ct =~ m!\bapplication/atom\+xml\b!) {
+ my $raw = $r->decoded_content;
+ my ($out, $err) = ('', '');
+ my $fail = $atom_check->(\$raw, \$out, \$err);
+ warn "Atom ($fail) - $u - <1:$out> <2:$err>\n" if $fail;
+ }
+
+ next if $ct !~ m!\btext/html\b!;
my $dc = $r->decoded_content;
if ($dc =~ /([\x00-\x08\x0d-\x1f\x7f-\x{99999999}]+)/s) {
my $o = $1;
}, 'known addresses populated');
}
+my @invalid = (
+ # git rejects this because it locks refnames, but we don't have
+ # this problem with inbox names:
+ # 'inbox.lock',
+
+ # git rejects these:
+ '', '..', '.', 'stash@{9}', 'inbox.', '^caret', '~tilde',
+ '*asterisk', 's p a c e s', ' leading-space', 'trailing-space ',
+ 'question?', 'colon:', '[square-brace]', "\fformfeed",
+ "\0zero", "\bbackspace",
+
+);
+
+require Data::Dumper;
+for my $s (@invalid) {
+ my $d = Data::Dumper->new([$s])->Terse(1)->Indent(0)->Dump;
+ ok(!PublicInbox::Config::valid_inbox_name($s), "$d name rejected");
+}
+
+# obviously-valid examples
+my @valid = qw(a a@example a@example.com);
+
+# Rejecting more was considered, but then it dawned on me that
+# people may intentionally use inbox names which are not URL-friendly
+# to prevent the PSGI interface from displaying them...
+# URL-unfriendly
+# '<', '>', '%', '#', '?', '&', '(', ')',
+
+# maybe these aren't so bad, they're common in Message-IDs, even:
+# '!', '$', '=', '+'
+push @valid, qw[bang! ca$h less< more> 1% (parens) &more eql= +plus], '#hash';
+for my $s (@valid) {
+ my $d = Data::Dumper->new([$s])->Terse(1)->Indent(0)->Dump;
+ ok(PublicInbox::Config::valid_inbox_name($s), "$d name accepted");
+}
+
+{
+ my $f = "$tmpdir/ordered";
+ open my $fh, '>', $f or die "open: $!";
+ my @expect;
+ foreach my $i (0..3) {
+ push @expect, "$i";
+ print $fh <<"" or die "print: $!";
+[publicinbox "$i"]
+ mainrepo = /path/to/$i.git
+ address = $i\@example.com
+
+ }
+ close $fh or die "close: $!";
+ my $cfg = PublicInbox::Config->new($f);
+ my @result;
+ $cfg->each_inbox(sub { push @result, $_[0]->{name} });
+ is_deeply(\@result, \@expect);
+}
+
+{
+ my $pfx1 = "publicinbox.test1";
+ my $pfx2 = "publicinbox.test2";
+ my $h = {
+ "$pfx1.address" => 'test@example.com',
+ "$pfx1.mainrepo" => '/path/to/non/existent',
+ "$pfx2.address" => 'foo@example.com',
+ "$pfx2.mainrepo" => '/path/to/foo',
+ "$pfx1.coderepo" => 'project',
+ "$pfx2.coderepo" => 'project',
+ "coderepo.project.dir" => '/path/to/project.git',
+ };
+ my $cfg = PublicInbox::Config->new($h);
+ my $t1 = $cfg->lookup_name('test1');
+ my $t2 = $cfg->lookup_name('test2');
+ is($t1->{-repo_objs}->[0], $t2->{-repo_objs}->[0],
+ 'inboxes share ::Git object');
+}
+
done_testing();
my $nl = scalar @ref;
ok($nl > 1, "qx returned array length of $nl");
- $gcf->qx(qw(repack -adbq));
+ $gcf->qx(qw(repack -adq));
ok($gcf->packed_bytes > 0, 'packed size is positive');
}
open $fh, '<', "$alt/config" or die "open failed: $!\n";
my $config = eval { local $/; <$fh> };
is($$found, $config, 'alternates reloaded');
+
+ ok($gcf->cleanup(time - 30), 'cleanup did not expire');
+ ok(!$gcf->cleanup(time + 30), 'cleanup can expire');
+ ok(!$gcf->cleanup, 'cleanup idempotent');
}
+use_ok 'PublicInbox::Git', qw(git_unquote git_quote);
+my $s;
+is("foo\nbar", git_unquote($s = '"foo\\nbar"'), 'unquoted newline');
+is("Eléanor", git_unquote($s = '"El\\303\\251anor"'), 'unquoted octal');
+is(git_unquote($s = '"I\"m"'), 'I"m', 'unquoted dq');
+is(git_unquote($s = '"I\\m"'), 'I\\m', 'unquoted backslash');
+
+is(git_quote($s = "Eléanor"), '"El\\303\\251anor"', 'quoted octal');
+is(git_quote($s = "hello\"world"), '"hello\"world"', 'quoted dq');
+is(git_quote($s = "hello\\world"), '"hello\\\\world"', 'quoted backslash');
+is(git_quote($s = "hello\nworld"), '"hello\\nworld"', 'quoted LF');
+
done_testing();
--- /dev/null
+#!/usr/bin/perl -w
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+eval { require highlight } or
+ plan skip_all => 'failed to load highlight.pm';
+use_ok 'PublicInbox::HlMod';
+my $hls = PublicInbox::HlMod->new;
+ok($hls, 'initialized OK');
+is($hls->_shebang2lang(\"#!/usr/bin/perl -w\n"), 'perl', 'perl shebang OK');
+is($hls->{-ext2lang}->{'pm'}, 'perl', '.pm suffix OK');
+is($hls->{-ext2lang}->{'pl'}, 'perl', '.pl suffix OK');
+is($hls->_path2lang('Makefile'), 'make', 'Makefile OK');
+my $str = do { local $/; open(my $fh, __FILE__); <$fh> };
+my $orig = $str;
+
+{
+ my $ref = $hls->do_hl(\$str, 'foo.perl');
+ is(ref($ref), 'SCALAR', 'got a scalar reference back');
+ like($$ref, qr/I can see you!/, 'we can see ourselves in output');
+ like($$ref, qr/&&/, 'escaped');
+
+ use PublicInbox::Spawn qw(which);
+ if (eval { require IPC::Run } && which('w3m')) {
+ require File::Temp;
+ my $cmd = [ qw(w3m -T text/html -dump -config /dev/null) ];
+ my ($out, $err) = ('', '');
+ IPC::Run::run($cmd, \('<pre>'.$$ref.'</pre>'), \$out, \$err);
+ # expand tabs and normalize whitespace,
+ # w3m doesn't preserve tabs
+ $orig =~ s/\t/ /gs;
+ $out =~ s/\s*\z//sg;
+ $orig =~ s/\s*\z//sg;
+ is($out, $orig, 'w3m output matches');
+ }
+}
+
+my $nr = $ENV{TEST_MEMLEAK};
+if ($nr && -r "/proc/$$/status") {
+ my $fh;
+ open $fh, '<', "/proc/$$/status";
+ diag "starting at memtest at ".join('', grep(/VmRSS:/, <$fh>));
+ PublicInbox::HlMod->new->do_hl(\$orig) for (1..$nr);
+ open $fh, '<', "/proc/$$/status";
+ diag "creating $nr instances: ".join('', grep(/VmRSS:/, <$fh>));
+ my $hls = PublicInbox::HlMod->new;
+ $hls->do_hl(\$orig) for (1..$nr);
+ $hls = undef;
+ open $fh, '<', "/proc/$$/status";
+ diag "reused instance $nr times: ".join('', grep(/VmRSS:/, <$fh>));
+}
+
+done_testing;
is('foo.bar', PublicInbox::Hval::to_filename("foo....bar"),
'to_filename squeezes -');
+my $s = "\0\x07\n";
+PublicInbox::Hval::src_escape($s);
+is($s, "\\0\\a\n", 'src_escape works as intended');
done_testing();
$mime->header_set('Message-ID', "<hello\tworld>");
is_deeply(mids($mime->header_obj), ['helloworld'],
'drop \t in Message-ID');
+
+ $mime->header_set('To', 'u@example.com');
+ $mime->header_set('References', '<hello> <world> <n> <u@example.com>');
+ is_deeply(references($mime->header_obj), [qw(hello world)]);
}
done_testing();
--- /dev/null
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Benchmark qw(:all);
+use PublicInbox::Inbox;
+use PublicInbox::View;
+require './t/common.perl';
+
+my @cat = qw(cat-file --buffer --batch-check --batch-all-objects);
+if (require_git(2.19, 1)) {
+ push @cat, '--unordered';
+} else {
+ warn
+"git <2.19, cat-file lacks --unordered, locality suffers\n";
+}
+
+my $pi_dir = $ENV{GIANT_PI_DIR};
+plan skip_all => "GIANT_PI_DIR not defined for $0" unless $pi_dir;
+
+my $ibx = PublicInbox::Inbox->new({ mainrepo => $pi_dir, name => 'name' });
+my $git = $ibx->git;
+my $fh = $git->popen(@cat);
+my $vec = '';
+vec($vec, fileno($fh), 1) = 1;
+select($vec, undef, undef, 60) or die "timed out waiting for --batch-check";
+
+my $ctx = {
+ env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
+ -inbox => $ibx,
+};
+my ($str, $mime, $res, $cmt, $type);
+my $n = 0;
+my $t = timeit(1, sub {
+ while (<$fh>) {
+ ($cmt, $type) = split / /;
+ next if $type ne 'blob';
+ ++$n;
+ $str = $git->cat_file($cmt);
+ $mime = PublicInbox::MIME->new($str);
+ $res = PublicInbox::View::msg_html($ctx, $mime);
+ $res = $res->[2];
+ while (defined($res->getline)) {}
+ $res->close;
+ }
+});
+diag 'msg_html took '.timestr($t)." for $n messages";
+ok 1;
+done_testing();
-# Copyright (C) 2016-2018 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2019 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use Test::More;
use_ok 'PublicInbox::Qspawn';
+{
+ my $cmd = [qw(sh -c), 'echo >&2 err; echo out'];
+ my $qsp = PublicInbox::Qspawn->new($cmd, {}, { 2 => 1 });
+ my $res;
+ $qsp->psgi_qx({}, undef, sub { $res = ${$_[0]} });
+ is($res, "err\nout\n", 'captured stderr and stdout');
+}
+
my $limiter = PublicInbox::Qspawn::Limiter->new(1);
{
my $x = PublicInbox::Qspawn->new([qw(true)]);
my ($rpipe) = @_;
is(0, sysread($rpipe, my $buf, 1), 'read zero bytes from false');
my $err = $x->finish;
- is($err, 256, 'error on finish');
+ ok($err, 'error on finish');
$run = 1;
});
is($run, 1, 'callback ran alright');
--- /dev/null
+From: WEB DESIGN EXPERT <BOFH@YHBT.net>
+To: meta@public-inbox.org
+Subject: [PATCH] TODO: take expert web design advice
+Date: Mon, 1 Apr 2019 08:15:20 +0000
+Message-Id: <20190401081523.16213-1-BOFH@YHBT.net>
+
+---
+ TODO | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/TODO b/TODO
+index 605013e..69df7d5 100644
+--- a/TODO
++++ b/TODO
+@@ -109,3 +109,5 @@ all need to be considered for everything we introduce)
+
+ * Optional history squashing to reduce commit and intermediate
+ tree objects
++
++ * Make use of <blink> and <marquee> tags
--- /dev/null
+From: POLITICAL CORRECTNESS EXPERT <BOFH@YHBT.net>
+To: meta@public-inbox.org
+Subject: [PATCH] POLITICALLY CORRECT FILE NAMING
+Date: Mon, 1 Apr 2019 08:15:20 +0000
+Message-Id: <20190401081523.16213-2-BOFH@YHBT.net>
+
+HACKING MIGHT GET US REPORTED TO EFF-BEE-EYE
+AND USE MARKDOWN CUZ MOAR FLAVORS == BETTER
+---
+ HACKING => CONTRIBUTING.md | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+ rename HACKING => CONTRIBUTING.md (94%)
+
+diff --git a/HACKING b/CONTRIBUTING.md
+similarity index 94%
+rename from HACKING
+rename to CONTRIBUTING.md
+index 3435775..0a92431 100644
+--- a/HACKING
++++ b/CONTRIBUTING.md
+@@ -1,5 +1,5 @@
+-hacking public-inbox
+---------------------
++contributing to public-inbox
++----------------------------
+
+ Send all patches and "git request-pull"-formatted emails to our
+ self-hosting inbox at meta@public-inbox.org
+@@ -15,7 +15,7 @@ Please consider our goals in mind:
+ Decentralization, Accessibility, Compatibility, Performance
+
+ These goals apply to everyone: users viewing over the web or NNTP,
+-sysadmins running public-inbox, and other hackers working public-inbox.
++sysadmins running public-inbox, and other contributors working public-inbox.
+
+ We will reject any feature which advocates or contributes to any
+ particular instance of a public-inbox becoming a single point of failure.
--- /dev/null
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw(tempdir);
+use Cwd qw(abs_path);
+require './t/common.perl';
+require_git(2.6);
+
+my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
+ URI::Escape Plack::Builder);
+foreach my $mod (@mods) {
+ eval "require $mod";
+ plan skip_all => "$mod missing for $0" if $@;
+}
+chomp(my $git_dir = `git rev-parse --git-dir 2>/dev/null`);
+plan skip_all => "$0 must be run from a git working tree" if $?;
+$git_dir = abs_path($git_dir);
+
+use_ok "PublicInbox::$_" for (qw(Inbox V2Writable MIME Git SolverGit));
+
+my $mainrepo = tempdir('pi-solver-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $opts = {
+ mainrepo => $mainrepo,
+ name => 'test-v2writable',
+ version => 2,
+ -primary_address => 'test@example.com',
+};
+my $ibx = PublicInbox::Inbox->new($opts);
+my $im = PublicInbox::V2Writable->new($ibx, 1);
+$im->{parallel} = 0;
+
+sub deliver_patch ($) {
+ open my $fh, '<', $_[0] or die "open: $!";
+ my $mime = PublicInbox::MIME->new(do { local $/; <$fh> });
+ $im->add($mime);
+ $im->done;
+}
+
+deliver_patch('t/solve/0001-simple-mod.patch');
+
+$ibx->{-repo_objs} = [ PublicInbox::Git->new($git_dir) ];
+my $res;
+my $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] });
+open my $log, '+>>', "$mainrepo/solve.log" or die "open: $!";
+my $psgi_env = { 'psgi.errors' => *STDERR };
+$solver->solve($psgi_env, $log, '69df7d5', {});
+ok($res, 'solved a blob!');
+my $wt_git = $res->[0];
+is(ref($wt_git), 'PublicInbox::Git', 'got a git object for the blob');
+my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0';
+is($res->[1], $expect, 'resolved blob to unabbreviated identifier');
+is($res->[2], 'blob', 'type specified');
+is($res->[3], 4405, 'size returned');
+
+is(ref($wt_git->cat_file($res->[1])), 'SCALAR', 'wt cat-file works');
+is_deeply([$expect, 'blob', 4405],
+ [$wt_git->check($res->[1])], 'wt check works');
+
+if (0) { # TODO: check this?
+ seek($log, 0, 0);
+ my $z = do { local $/; <$log> };
+ diag $z;
+}
+
+my $oid = $expect;
+for my $i (1..2) {
+ my $more;
+ my $s = PublicInbox::SolverGit->new($ibx, sub { $more = $_[0] });
+ $s->solve($psgi_env, $log, $oid, {});
+ is($more->[1], $expect, 'resolved blob to long OID '.$i);
+ chop($oid);
+}
+
+$solver = undef;
+$res = undef;
+my $wt_git_dir = $wt_git->{git_dir};
+$wt_git = undef;
+ok(!-d $wt_git_dir, 'no references to WT held');
+
+$solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] });
+$solver->solve($psgi_env, $log, '0'x40, {});
+is($res, undef, 'no error on z40');
+
+my $git_v2_20_1_tag = '7a95a1cd084cb665c5c2586a415e42df0213af74';
+$solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] });
+$solver->solve($psgi_env, $log, $git_v2_20_1_tag, {});
+is($res, undef, 'no error on a tag not in our repo');
+
+deliver_patch('t/solve/0002-rename-with-modifications.patch');
+$solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] });
+$solver->solve($psgi_env, $log, '0a92431', {});
+ok($res, 'resolved without hints');
+
+my $hints = {
+ oid_a => '3435775',
+ path_a => 'HACKING',
+ path_b => 'CONTRIBUTING'
+};
+$solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] });
+$solver->solve($psgi_env, $log, '0a92431', $hints);
+my $hinted = $res;
+# don't compare ::Git objects:
+shift @$res; shift @$hinted;
+is_deeply($res, $hinted, 'hints work (or did not hurt :P');
+
+done_testing();
use Email::MIME;
use Plack::Util;
use_ok 'PublicInbox::View';
+use_ok 'PublicInbox::Config';
# FIXME: make this test less fragile
my $ctx = {
nntp_url => sub {[]},
max_git_part => sub { undef },
description => sub { '' }),
+ www => Plack::Util::inline_object(style => sub { '' }),
};
$ctx->{-inbox}->{-primary_address} = 'test@example.com';