From: Eric Wong Date: Sun, 5 May 2019 23:28:02 +0000 (+0000) Subject: Merge remote-tracking branch 'origin/wwwlisting' X-Git-Tag: v1.2.0~301 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=e1ed79d1e99e2c58b3edb370e60904cf656dd823;hp=7a3946ef122e8218c6ce3355d7f968562212d53b Merge remote-tracking branch 'origin/wwwlisting' * origin/wwwlisting: www: support listing of inboxes start depending on Perl 5.10.1+ --- diff --git a/Documentation/.gitignore b/Documentation/.gitignore index 107ad36f..e78a0d33 100644 --- a/Documentation/.gitignore +++ b/Documentation/.gitignore @@ -1 +1,2 @@ /public-inbox-*.txt +/standards.txt diff --git a/Documentation/include.mk b/Documentation/include.mk index 28fa7574..02cbef30 100644 --- a/Documentation/include.mk +++ b/Documentation/include.mk @@ -81,8 +81,12 @@ txt2pre = $(PERL) -I lib ./Documentation/txt2pre <$< >$@+ && \ touch -r $< $@+ && mv $@+ $@ txt := INSTALL README COPYING TODO dtxt := design_notes.txt design_www.txt dc-dlvr-spam-flow.txt hosted.txt +dtxt += standards.txt dtxt := $(addprefix Documentation/, $(dtxt)) $(mantxt) +Documentation/standards.txt : Documentation/standards.perl + $(PERL) $< >$@+ && mv $@+ $@ + %.html: %.txt TITLE="$(basename $($@+ touch -r $< $@+ diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod index 17b8bac7..d44c8f30 100644 --- a/Documentation/public-inbox-config.pod +++ b/Documentation/public-inbox-config.pod @@ -225,6 +225,15 @@ directive is configured. Default: /var/www/htdocs/cgit/cgit.cgi or /usr/lib/cgit/cgit.cgi +=item publicinbox.cgitdata + +A path to the data directory used by cgit for storing static files. +Typically guessed based the location of C (from +C, but may be overridden. + +Default: basename of C, /var/www/htdocs/cgit/ +or /usr/share/cgit/ + =item publicinbox.wwwlisting Enable a HTML listing style when the root path of the URL '/' is accessed. diff --git a/Documentation/standards.perl b/Documentation/standards.perl new file mode 100755 index 00000000..f75c4122 --- /dev/null +++ b/Documentation/standards.perl @@ -0,0 +1,77 @@ +#!/usr/bin/perl -w +use strict; +# Copyright 2019 all contributors +# License: AGPL-3.0+ + +print < 'NNTP', + 977 => 'NNTP (old)', + 6048 => 'NNTP additions to LIST command (TODO)', + 8054 => 'NNTP compression (TODO)', + 4642 => 'NNTP TLS (TODO)', + 8143 => 'NNTP TLS (TODO)', + 2980 => 'NNTP extensions (obsolete, but NOT irrelevant)', + 4287 => 'Atom syndication', + 4685 => 'Atom threading extensions', + 2919 => 'List-Id mail header', + 5064 => 'Archived-At mail header', + 3986 => 'URI escaping', + 1521 => 'MIME extensions', + 2616 => 'HTTP/1.1 (newer updates should apply, too)', + 7230 => 'HTTP/1.1 message syntax and routing', + 7231 => 'HTTP/1.1 semantics and content', + 2822 => 'Internet message format', + # TODO: flesh this out + +]; + +my @rfc_urls = qw(tools.ietf.org/html/rfc%d + www.rfc-editor.org/errata_search.php?rfc=%d); + +for (my $i = 0; $i < $#$rfcs;) { + my $num = $rfcs->[$i++]; + my $txt = $rfcs->[$i++]; + print "rfc$num\t- $txt\n"; + + printf "\thttps://$_\n", $num foreach @rfc_urls; + print "\n"; +} + +print <<'EOF' +Other relevant documentation +---------------------------- + +* Documentation/technical/http-protocol.txt in git source code: + https://public-inbox.org/git/9c5b6f0fac/s + +* Various mbox formats (we currently emit and parse mboxrd) + https://en.wikipedia.org/wiki/Mbox + +* PSGI/Plack specifications (as long as our web frontend uses Perl5) + git clone https://github.com/plack/psgi-specs.git + +Copyright +--------- + +Copyright 2019 all contributors +License: AGPL-3.0+ +EOF diff --git a/MANIFEST b/MANIFEST index 881d2f07..4bdcda3c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -19,6 +19,7 @@ Documentation/public-inbox-overview.pod Documentation/public-inbox-v1-format.pod Documentation/public-inbox-v2-format.pod Documentation/public-inbox-watch.pod +Documentation/standards.perl Documentation/txt2pre HACKING INSTALL @@ -38,6 +39,7 @@ examples/apache2_perl.conf examples/apache2_perl_old.conf examples/cgi-webrick.rb examples/cgit-commit-filter.lua +examples/cgit-wwwhighlight-filter.lua examples/cgit.psgi examples/highlight.psgi examples/logrotate.conf diff --git a/examples/cgit-commit-filter.lua b/examples/cgit-commit-filter.lua index 7799befa..16772534 100644 --- a/examples/cgit-commit-filter.lua +++ b/examples/cgit-commit-filter.lua @@ -13,14 +13,19 @@ local urls = {} urls['public-inbox.git'] = 'https://public-inbox.org/meta/' -- additional URLs here... +-- TODO we should be able to auto-generate this based on "coderepo" +-- directives in the public-inbox config file; but keep in mind +-- the mapping is M:N between inboxes and coderepos function filter_open(...) lineno = 0 buffer = "" - subject = "" end function filter_close() + -- cgit opens and closes this filter for the commit subject + -- and body separately, and we only generate the link based + -- on the commit subject: if lineno == 1 and string.find(buffer, "\n") == nil then u = urls[os.getenv('CGIT_REPO_URL')] if u == nil then @@ -33,6 +38,9 @@ function filter_close() html('') end else + -- pass the body-through as-is + -- TODO: optionally use WwwHighlight for linkification like + -- cgit-wwwhighlight-filter.lua html(buffer) end return 0 diff --git a/examples/cgit-wwwhighlight-filter.lua b/examples/cgit-wwwhighlight-filter.lua new file mode 100644 index 00000000..a267d1c8 --- /dev/null +++ b/examples/cgit-wwwhighlight-filter.lua @@ -0,0 +1,105 @@ +-- Copyright (C) 2019 all contributors +-- License: GPL-2.0+ +-- +-- This filter accesses the PublicInbox::WwwHighlight PSGI endpoint +-- (see examples/highlight.psgi) +-- +-- Dependencies: lua-http +-- +-- disclaimer: written by someone who does not know Lua. +-- +-- This requires cgit linked with Lua +-- Usage (in your cgitrc(5) config file): +-- +-- source-filter=lua:/path/to/this/script.lua +-- about-filter=lua:/path/to/this/script.lua +-- +local wwwhighlight_url = 'http://127.0.0.1:9090/' +local req_timeout = 10 +local too_big = false + +-- match $PublicInbox::HTTP::MAX_REQUEST_BUFFER +local max_len = 10 * 1024 * 1024 + +-- about-filter needs surrounding
 tags if all we do is
+-- highlight and linkify
+local pre = true
+
+function filter_open(...)
+	req_body = ""
+
+	-- detect when we're used in an about-filter
+	local repo_url = os.getenv('CGIT_REPO_URL')
+	if repo_url then
+		local path_info = os.getenv('PATH_INFO')
+		rurl = path_info:match("^/(.+)/about/?$")
+		pre = rurl == repo_url
+	end
+
+	-- hand filename off for language detection
+	local fn = select(1, ...)
+	if fn then
+		local http_util = require 'http.util'
+		wwwhighlight_url = wwwhighlight_url .. http_util.encodeURI(fn)
+	end
+end
+
+-- try to buffer the entire source in memory
+function filter_write(str)
+	if too_big then
+		html(str)
+	elseif (req_body:len() + str:len()) > max_len then
+		too_big = true
+		req_body = ""
+		html(req_body)
+		html(str)
+	else
+		req_body = req_body .. str
+	end
+end
+
+function fail(err)
+	io.stderr:write(tostring(err), "\n")
+	if pre then
+		html("
")
+	end
+	html_txt(req_body)
+	if pre then
+		html("
") + end + return 1 +end + +function filter_close() + if too_big then + return 0 + end + local request = require 'http.request' + local req = request.new_from_uri(wwwhighlight_url) + req.headers:upsert(':method', 'PUT') + req:set_body(req_body) + + -- don't wait for 100-Continue message from the PSGI app + req.headers:delete('expect') + + local headers, stream = req:go(req_timeout) + if headers == nil then + return fail(stream) + end + local status = headers:get(':status') + if status ~= '200' then + return fail('status ' .. status) + end + local body, err = stream:get_body_as_string() + if not body and err then + return fail(err) + end + if pre then + html("
")
+	end
+	html(body)
+	if pre then
+		html("
") + end + return 0 +end diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm index 8922ec56..353f4162 100644 --- a/lib/PublicInbox/Cgit.pm +++ b/lib/PublicInbox/Cgit.pm @@ -35,7 +35,15 @@ sub locate_cgit ($) { } } unless (defined $cgit_data) { - foreach my $d (qw(/var/www/htdocs/cgit /usr/share/cgit)) { + my @dirs = qw(/var/www/htdocs/cgit /usr/share/cgit); + + # local installs of cgit from source have + # CGIT_SCRIPT_PATH==CGIT_DATA_PATH by default, + # so we can usually infer the cgit_data path from cgit_bin + if (defined($cgit_bin) && $cgit_bin =~ m!\A(.+?)/[^/]+\z!) { + unshift @dirs, $1 if -d $1; + } + foreach my $d (@dirs) { my $f = "$d/cgit.css"; next unless -f $f; $cgit_data = $d; @@ -90,6 +98,7 @@ my @PASS_ENV = qw( sub call { my ($self, $env) = @_; my $path_info = $env->{PATH_INFO}; + my $cgit_data; # handle requests without spawning cgit iff possible: if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!ox) { @@ -97,10 +106,11 @@ sub call { if (my $git = $self->{"\0$nick"}) { return serve($env, $git, $path); } - } elsif ($path_info =~ m!$self->{static}!) { + } elsif ($path_info =~ m!$self->{static}! && + defined($cgit_data = $self->{cgit_data})) { my $f = $1; my $type = Plack::MIME->mime_type($f); - return static_result($env, [], "$self->{cgit_data}$f", $type); + return static_result($env, [], $cgit_data.$f, $type); } my $cgi_env = { PATH_INFO => $path_info }; diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 14d49cc5..d07d5a79 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -8,13 +8,13 @@ package PublicInbox::ExtMsg; use strict; use warnings; -use PublicInbox::Hval; +use PublicInbox::Hval qw/ascii_html/; use PublicInbox::MID qw/mid2path/; use PublicInbox::WwwStream; our $MIN_PARTIAL_LEN = 16; # TODO: user-configurable -our @EXT_URL = ( +our @EXT_URL = map { ascii_html($_) } ( # leading "//" denotes protocol-relative (http:// or https://) '//marc.info/?i=%s', '//www.mail-archive.com/search?l=mid&q=%s', diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 62bdf0a1..47a2046e 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -458,7 +458,8 @@ sub thread_html { $ctx->{prev_level} = 0; $ctx->{root_anchor} = anchor_for($mid); $ctx->{mapping} = {}; - $ctx->{s_nr} = "$nr+ messages in thread"; + $ctx->{s_nr} = ($nr > 1 ? "$nr+ messages" : 'only message') + .' in thread'; my $rootset = thread_results($ctx, $msgs); diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm index 0cce952d..6b8d9437 100644 --- a/lib/PublicInbox/ViewDiff.pm +++ b/lib/PublicInbox/ViewDiff.pm @@ -146,7 +146,7 @@ sub flush_diff ($$$) { if ($s =~ /^---$/) { to_state($dst, $state, DSTATE_STAT); $$dst .= $s; - } elsif ($s =~ /^ /) { + } elsif ($s =~ /^ / || ($s =~ /^$/ && $state >= DSTATE_CTX)) { # works for common cases, but not weird/long filenames if ($state == DSTATE_STAT && $s =~ /^ (.+)( +\| .*\z)/s) { diff --git a/lib/PublicInbox/WwwHighlight.pm b/lib/PublicInbox/WwwHighlight.pm index 01916401..bc349f8a 100644 --- a/lib/PublicInbox/WwwHighlight.pm +++ b/lib/PublicInbox/WwwHighlight.pm @@ -24,6 +24,8 @@ use warnings; use bytes (); # only for bytes::length use HTTP::Status qw(status_message); use parent qw(PublicInbox::HlMod); +use PublicInbox::Linkify qw(); +use PublicInbox::Hval qw(ascii_html); # TODO: support highlight(1) for distros which don't package the # SWIG extension. Also, there may be admins who don't want to @@ -64,7 +66,14 @@ sub call { return r(405) if $req_method ne 'PUT'; my $bref = read_in_full($env) or return r(500); - $bref = $self->do_hl($bref, $env->{PATH_INFO}); + my $l = PublicInbox::Linkify->new; + $l->linkify_1($$bref); + if (my $res = $self->do_hl($bref, $env->{PATH_INFO})) { + $bref = $res; + } else { + $$bref = ascii_html($$bref); + } + $l->linkify_2($$bref); my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ]; push @$h, 'Content-Length', bytes::length($$bref); diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index c708c21f..2893138d 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -12,7 +12,6 @@ use warnings; use PublicInbox::Hval qw(ascii_html); use URI; our $TOR_URL = 'https://www.torproject.org/'; -our $TOR2WEB_URL = 'https://www.tor2web.org/'; our $CODE_URL = 'https://public-inbox.org/'; our $PROJECT = 'public-inbox'; @@ -140,10 +139,6 @@ EOF if ($urls =~ m!\b[^:]+://\w+\.onion/!) { $urls .= "\n note: .onion URLs require Tor: "; $urls .= qq[$TOR_URL]; - if ($TOR2WEB_URL) { - $urls .= "\n or Tor2web: "; - $urls .= qq[$TOR2WEB_URL]; - } } '
'.join("\n\n",
 		$desc,
diff --git a/t/search.t b/t/search.t
index 6415a644..35d71473 100644
--- a/t/search.t
+++ b/t/search.t
@@ -430,13 +430,23 @@ $ibx->with_umask(sub {
 	is($ro->lookup_article($art->{num}), undef, 'gone from OVER DB') if defined($art);
 });
 
+my $all_mask = 07777;
+my $dir_mask = 02770;
+
+# FreeBSD does not allow non-root users to set S_ISGID, so
+# git doesn't set it, either (see DIR_HAS_BSD_GROUP_SEMANTICS in git.git)
+if ($^O =~ /freebsd/i) {
+	$all_mask = 0777;
+	$dir_mask = 0770;
+}
+
 foreach my $f ("$git_dir/public-inbox/msgmap.sqlite3",
 		"$git_dir/public-inbox",
 		glob("$git_dir/public-inbox/xapian*/"),
 		glob("$git_dir/public-inbox/xapian*/*")) {
 	my @st = stat($f);
 	my ($bn) = (split(m!/!, $f))[-1];
-	is($st[2] & 07777, -f _ ? 0660 : 02770,
+	is($st[2] & $all_mask, -f _ ? 0660 : $dir_mask,
 		"sharedRepository respected for $bn");
 }