From: Eric Wong Date: Tue, 21 May 2019 23:22:05 +0000 (+0000) Subject: Merge remote-tracking branch 'origin/xap-optional' into master X-Git-Tag: v1.2.0~269 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=e220b8b2ee5cfd458167dc2c6c92726352c4c80e;hp=d0e8bfd866ed1e924e8d9f551939eecbea4920ef Merge remote-tracking branch 'origin/xap-optional' into master * origin/xap-optional: admin: improve warnings and errors for missing modules searchidx: do not create empty Xapian partitions for basic lazy load Xapian and make it optional for v2 www: use Inbox->over where appropriate nntp: use Inbox->over directly inbox: add ->over method to ease access --- diff --git a/Documentation/include.mk b/Documentation/include.mk index 2a02611a..6415338b 100644 --- a/Documentation/include.mk +++ b/Documentation/include.mk @@ -10,6 +10,7 @@ RSYNC = rsync RSYNC_DEST = public-inbox.org:/srv/public-inbox/ txt := INSTALL README COPYING TODO HACKING dtxt := design_notes.txt design_www.txt dc-dlvr-spam-flow.txt hosted.txt +dtxt += marketing.txt dtxt += standards.txt dtxt := $(addprefix Documentation/, $(dtxt)) docs := $(txt) $(dtxt) diff --git a/Documentation/marketing.txt b/Documentation/marketing.txt new file mode 100644 index 00000000..385e5172 --- /dev/null +++ b/Documentation/marketing.txt @@ -0,0 +1,27 @@ +marketing guide for public-inbox + +TL; DR: Don't market this. + +If you must: don't be pushy and annoying about it. Slow down. +Please no superlatives, hype or BS. + +It's online and public, so it already markets itself. +Being informative is not a bad thing, being insistent is. + +Chances are, you're preaching to the choir; or the folks you're +trying to convince are not ready for everything our project +represents to the resistance against centralization. + +Baby steps... + +There's never a need for anybody to migrate to using our +software, or to use any particular instance of it. It's +designed to coexist with other mail archives, especially +other installations of public-inbox. + +Most importantly, we take victories even when our software +doesn't get adopted. Freedom from lock-in is more important +than the adoption of any software. + +Every time somebody recognizes and rejects various forms of +lock-in and centralization is already a victory for us. diff --git a/INSTALL b/INSTALL index 72e0763c..0246299b 100644 --- a/INSTALL +++ b/INSTALL @@ -36,11 +36,6 @@ Beyond that, there is a long list of Perl modules required, starting with: pkg: p5-TimeDate rpm: perl-TimeDate -* Devel::Peek deb: libperl5.$MINOR (e.g. libperl5.24) - pkg: perl5 - rpm: perl-Devel-Peek - (typically installed alongside Perl5) - * Email::MIME deb: libemail-mime-perl pkg: p5-Email-MIME rpm: perl-Email-MIME @@ -125,6 +120,12 @@ above, so there is no need to explicitly install them: rpm: perl-DBI (pulled in by DBD::SQLite) +* Devel::Peek deb: libperl5.$MINOR (e.g. libperl5.24) + pkg: perl5 + rpm: perl-Devel-Peek + (optional for stale FD cleanup in daemons, + typically installed alongside Perl5) + - Filesys::Notify::Simple deb: libfilesys-notify-simple-perl pkg: pkg-Filesys-Notify-Simple rpm: perl-Filesys-Notify-Simple diff --git a/MANIFEST b/MANIFEST index 1da40a90..2c356c62 100644 --- a/MANIFEST +++ b/MANIFEST @@ -7,6 +7,7 @@ Documentation/design_notes.txt Documentation/design_www.txt Documentation/hosted.txt Documentation/include.mk +Documentation/marketing.txt Documentation/public-inbox-compact.pod Documentation/public-inbox-config.pod Documentation/public-inbox-convert.pod diff --git a/Makefile.PL b/Makefile.PL index 6be913b1..de0c49fd 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -38,10 +38,6 @@ WriteMakefile( # `perl5' on FreeBSD 'Encode' => 0, - # libperl$PERL_VERSION on Debian, `perl5' on FreeBSD, - # but Fedora seems to need this separately - 'Devel::Peek' => 0, - # TODO: these should really be made optional... 'Plack' => 0, 'URI::Escape' => 0, diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index 68ba9876..227ba5f9 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -13,6 +13,7 @@ use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); STDOUT->autoflush(1); STDERR->autoflush(1); require PublicInbox::DS; +require PublicInbox::EvCleanup; require POSIX; require PublicInbox::Listener; require PublicInbox::ParentPipe; @@ -463,6 +464,7 @@ sub master_loop { sub daemon_loop ($$) { my ($refresh, $post_accept) = @_; + PublicInbox::EvCleanup::enable(); # early for $refresh my $parent_pipe; if ($worker_processes > 0) { $refresh->(); # preload by default @@ -485,7 +487,6 @@ sub daemon_loop ($$) { @listeners = map { PublicInbox::Listener->new($_, $post_accept) } @listeners; - PublicInbox::EvCleanup::enable(); PublicInbox::DS->EventLoop; $parent_pipe = undef; } diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index 11bd241e..10e6d6a4 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -18,6 +18,7 @@ use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl use HTTP::Status qw(status_message); use HTTP::Date qw(time2str); use IO::Handle; +require PublicInbox::EvCleanup; use constant { CHUNK_START => -1, # [a-f0-9]+\r\n CHUNK_END => -2, # \r\n diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 12abf399..81a38fb6 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -367,10 +367,14 @@ sub add { my @ct = msg_timestamp($hdr); my $author_time_raw = git_timestamp(@at); my $commit_time_raw = git_timestamp(@ct); + my $subject = $mime->header('Subject'); $subject = '(no subject)' unless defined $subject; - my $path_type = $self->{path_type}; + # Mime decoding can create nulls replace them with spaces to protect git + $subject =~ tr/\0/ /; + utf8::encode($subject); + my $path_type = $self->{path_type}; my $path; if ($path_type eq '2/38') { $path = mid2path(v1_mid0($mime)); @@ -411,9 +415,6 @@ sub add { print $w "reset $ref\n" or wfail; } - # Mime decoding can create nulls replace them with spaces to protect git - $subject =~ tr/\0/ /; - utf8::encode($subject); print $w "commit $ref\nmark :$commit\n", "author $name <$email> $author_time_raw\n", "committer $self->{ident} $commit_time_raw\n" or wfail; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 813ed997..0d86771f 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -7,26 +7,30 @@ use strict; use warnings; use PublicInbox::Git; use PublicInbox::MID qw(mid2path); -use Devel::Peek qw(SvREFCNT); use PublicInbox::MIME; -use POSIX qw(strftime); +# Long-running "git-cat-file --batch" processes won't notice +# unlinked packs, so we need to restart those processes occasionally. +# Xapian and SQLite file handles are mostly stable, but sometimes an +# admin will attempt to replace them atomically after compact/vacuum +# and we need to be prepared for that. my $cleanup_timer; -eval { - $cleanup_timer = 'disabled'; - require PublicInbox::EvCleanup; - $cleanup_timer = undef; # OK if we get here -}; -my $cleanup_broken = $@; - +my $cleanup_avail = -1; # 0, or 1 +my $have_devel_peek; my $CLEANUP = {}; # string(inbox) -> inbox sub cleanup_task () { $cleanup_timer = undef; my $next = {}; for my $ibx (values %$CLEANUP) { my $again; - foreach my $f (qw(mm search over)) { - delete $ibx->{$f} if SvREFCNT($ibx->{$f}) == 1; + if ($have_devel_peek) { + foreach my $f (qw(mm search over)) { + # we bump refcnt by assigning tmp, here: + my $tmp = $ibx->{$f} or next; + next if Devel::Peek::SvREFCNT($tmp) > 2; + delete $ibx->{$f}; + # refcnt is zero when tmp is out-of-scope + } } my $expire = time - 60; if (my $git = $ibx->{git}) { @@ -37,16 +41,31 @@ sub cleanup_task () { $again = 1 if $git->cleanup($expire); } } - $again ||= !!($ibx->{over} || $ibx->{mm} || $ibx->{search}); + if ($have_devel_peek) { + $again ||= !!($ibx->{over} || $ibx->{mm} || + $ibx->{search}); + } $next->{"$ibx"} = $ibx if $again; } $CLEANUP = $next; } +sub cleanup_possible () { + # no need to require EvCleanup, here, if it were enabled another + # module would've require'd it, already + eval { PublicInbox::EvCleanup::enabled() } or return 0; + + eval { + require Devel::Peek; # needs separate package in Fedora + $have_devel_peek = 1; + }; + 1; +} + sub _cleanup_later ($) { my ($self) = @_; - return if $cleanup_broken; - return unless PublicInbox::EvCleanup::enabled(); + $cleanup_avail = cleanup_possible() if $cleanup_avail < 0; + return if $cleanup_avail != 1; $cleanup_timer ||= PublicInbox::EvCleanup::later(*cleanup_task); $CLEANUP->{"$self"} = $self; } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index b1e62f4c..eae10d8e 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -24,6 +24,10 @@ sub load_xapian () { # n.b. FLAG_PURE_NOT is expensive not suitable for a public # website as it could become a denial-of-service vector + # FLAG_PHRASE also seems to cause performance problems + # sometimes. + # TODO: make this an option, maybe? + # or make indexlevel=medium as default FLAG_PHRASE()|FLAG_BOOLEAN()|FLAG_LOVEHATE()|FLAG_WILDCARD(); }; }; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index f96f0d03..114420e4 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -19,7 +19,6 @@ use POSIX qw(strftime); use PublicInbox::OverIdx; use PublicInbox::Spawn qw(spawn); use PublicInbox::Git qw(git_unquote); -use Compress::Zlib qw(compress); use constant { BATCH_BYTES => defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ? diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 8e1b1afe..b6f18f8d 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -59,14 +59,14 @@ sub call { my $ctx = { env => $env, www => $self }; # we don't care about multi-value - my %qp = map { + %{$ctx->{qp}} = map { utf8::decode($_); - my ($k, $v) = split('=', uri_unescape($_), 2); - $v = '' unless defined $v; - $v =~ tr/+/ /; - ($k, $v) + tr/+/ /; + my ($k, $v) = split('=', $_, 2); + $v = uri_unescape($v // ''); + # none of the keys we care about will need escaping + $k => $v; } split(/[&;]+/, $env->{QUERY_STRING}); - $ctx->{qp} = \%qp; # avoiding $env->{PATH_INFO} here since that's already decoded my ($path_info) = ($env->{REQUEST_URI} =~ path_re($env)); diff --git a/t/config_limiter.t b/t/config_limiter.t index 85a71257..b18951a6 100644 --- a/t/config_limiter.t +++ b/t/config_limiter.t @@ -38,7 +38,6 @@ my $cfgpfx = "publicinbox.test"; ok($lim, 'Limiter exists'); is($lim->{max}, 3, 'limiter has expected slots'); $ibx->{git} = undef; - PublicInbox::Inbox::cleanup_task; my $new = $ibx->git; isnt($old, "$new", 'got new Git object'); is("$new->{-httpbackend_limiter}", "$lim", 'same limiter');