pkg: p5-TimeDate
rpm: perl-TimeDate
-* Devel::Peek deb: libperl5.$MINOR (e.g. libperl5.24)
- pkg: perl5
- rpm: perl-Devel-Peek
- (typically installed alongside Perl5)
-
* Email::MIME deb: libemail-mime-perl
pkg: p5-Email-MIME
rpm: perl-Email-MIME
- Search::Xapian deb: libsearch-xapian-perl
pkg: p5-Search-Xapian
rpm: perl-Search-Xapian
- (for v2, HTTP search)
+ (HTTP search)
- Net::Server deb: libnet-server-perl
pkg: pkg-Net-Server
rpm: perl-DBI
(pulled in by DBD::SQLite)
+* Devel::Peek deb: libperl5.$MINOR (e.g. libperl5.24)
+ pkg: perl5
+ rpm: perl-Devel-Peek
+ (optional for stale FD cleanup in daemons,
+ typically installed alongside Perl5)
+
- Filesys::Notify::Simple deb: libfilesys-notify-simple-perl
pkg: pkg-Filesys-Notify-Simple
rpm: perl-Filesys-Notify-Simple
use warnings;
use PublicInbox::Git;
use PublicInbox::MID qw(mid2path);
-use Devel::Peek qw(SvREFCNT);
use PublicInbox::MIME;
-use POSIX qw(strftime);
+# Long-running "git-cat-file --batch" processes won't notice
+# unlinked packs, so we need to restart those processes occasionally.
+# Xapian and SQLite file handles are mostly stable, but sometimes an
+# admin will attempt to replace them atomically after compact/vacuum
+# and we need to be prepared for that.
my $cleanup_timer;
-eval {
- $cleanup_timer = 'disabled';
- require PublicInbox::EvCleanup;
- $cleanup_timer = undef; # OK if we get here
-};
-my $cleanup_broken = $@;
-
+my $cleanup_avail = -1; # 0, or 1
+my $have_devel_peek;
my $CLEANUP = {}; # string(inbox) -> inbox
sub cleanup_task () {
$cleanup_timer = undef;
my $next = {};
for my $ibx (values %$CLEANUP) {
my $again;
- foreach my $f (qw(mm search over)) {
- delete $ibx->{$f} if SvREFCNT($ibx->{$f}) == 1;
+ if ($have_devel_peek) {
- foreach my $f (qw(mm search)) {
++ foreach my $f (qw(mm search over)) {
+ # we bump refcnt by assigning tmp, here:
+ my $tmp = $ibx->{$f} or next;
+ next if Devel::Peek::SvREFCNT($tmp) > 2;
+ delete $ibx->{$f};
+ # refcnt is zero when tmp is out-of-scope
+ }
}
my $expire = time - 60;
if (my $git = $ibx->{git}) {
$again = 1 if $git->cleanup($expire);
}
}
- $again ||= !!($ibx->{over} || $ibx->{mm} || $ibx->{search});
+ if ($have_devel_peek) {
- $again ||= !!($ibx->{mm} || $ibx->{search});
++ $again ||= !!($ibx->{over} || $ibx->{mm} ||
++ $ibx->{search});
+ }
$next->{"$ibx"} = $ibx if $again;
}
$CLEANUP = $next;
}
+sub cleanup_possible () {
+ # no need to require EvCleanup, here, if it were enabled another
+ # module would've require'd it, already
+ eval { PublicInbox::EvCleanup::enabled() } or return 0;
+
+ eval {
+ require Devel::Peek; # needs separate package in Fedora
+ $have_devel_peek = 1;
+ };
+ 1;
+}
+
sub _cleanup_later ($) {
my ($self) = @_;
- return if $cleanup_broken;
- return unless PublicInbox::EvCleanup::enabled();
+ $cleanup_avail = cleanup_possible() if $cleanup_avail < 0;
+ return if $cleanup_avail != 1;
$cleanup_timer ||= PublicInbox::EvCleanup::later(*cleanup_task);
$CLEANUP->{"$self"} = $self;
}
};
}
- sub search {
- my ($self) = @_;
- $self->{search} ||= eval {
+ sub search ($;$) {
+ my ($self, $over_only) = @_;
+ my $srch = $self->{search} ||= eval {
_cleanup_later($self);
+ require PublicInbox::Search;
PublicInbox::Search->new($self, $self->{altid});
};
+ ($over_only || eval { $srch->xdb }) ? $srch : undef;
+ }
+
+ sub over ($) {
+ my ($self) = @_;
+ my $srch = search($self, 1) or return;
+ $self->{over} ||= eval {
+ my $over = $srch->{over_ro};
+ $over->dbh_new; # may fail
+ $over;
+ }
}
sub try_cat {
sub nntp_usable {
my ($self) = @_;
- my $ret = $self->mm && $self->search;
- $self->{mm} = $self->{search} = undef;
+ my $ret = mm($self) && over($self);
+ $self->{mm} = $self->{over} = $self->{search} = undef;
$ret;
}
sub smsg_by_mid ($$) {
my ($self, $mid) = @_;
- my $srch = search($self) or return;
+ my $over = over($self) or return;
# favor the Message-ID we used for the NNTP article number:
defined(my $num = mid2num($self, $mid)) or return;
- my $smsg = $srch->lookup_article($num) or return;
+ my $smsg = $over->get_art($num) or return;
PublicInbox::SearchMsg::psgi_cull($smsg);
}
sub msg_by_mid ($$;$) {
my ($self, $mid, $ref) = @_;
- my $srch = search($self) or
+
+ over($self) or
return msg_by_path($self, mid2path($mid), $ref);
+
my $smsg = smsg_by_mid($self, $mid);
$smsg ? msg_by_smsg($self, $smsg, $ref) : undef;
}
sub recent {
my ($self, $opts, $after, $before) = @_;
- search($self)->{over_ro}->recent($opts, $after, $before);
+ over($self)->recent($opts, $after, $before);
}
sub modified {
my ($self) = @_;
- if (my $srch = search($self)) {
- my $msgs = $srch->{over_ro}->recent({limit => 1});
+ if (my $over = over($self)) {
+ my $msgs = $over->recent({limit => 1});
if (my $smsg = $msgs->[0]) {
return $smsg->{ts};
}
use constant YYYYMMDD => 1; # Date: header for searching in the WWW UI
use constant DT => 2; # Date: YYYYMMDDHHMMSS
- use Search::Xapian qw/:standard/;
use PublicInbox::SearchMsg;
use PublicInbox::MIME;
use PublicInbox::MID qw/id_compress/;
use PublicInbox::Over;
+ my $QP_FLAGS;
+ sub load_xapian () {
+ $QP_FLAGS ||= eval {
+ require Search::Xapian;
+ Search::Xapian->import(qw(:standard));
+
+ # n.b. FLAG_PURE_NOT is expensive not suitable for a public
+ # website as it could become a denial-of-service vector
++ # FLAG_PHRASE also seems to cause performance problems
++ # sometimes.
++ # TODO: make this an option, maybe?
++ # or make indexlevel=medium as default
+ FLAG_PHRASE()|FLAG_BOOLEAN()|FLAG_LOVEHATE()|FLAG_WILDCARD();
+ };
+ };
# This is English-only, everything else is non-standard and may be confused as
# a prefix common in patch emails
- our $REPLY_RE = qr/^re:\s+/i;
our $LANG = 'english';
use constant {
# (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0)
# 14 - fix ghost root vivification
SCHEMA_VERSION => 15,
-
- # n.b. FLAG_PURE_NOT is expensive not suitable for a public website
- # as it could become a denial-of-service vector
- #
- # FLAG_PHRASE also seems to cause performance problems sometimes.
- # TODO: make this an option, maybe?
- # or make indexlevel=medium as default
- QP_FLAGS => FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
};
my %bool_pfx_external = (
);
chomp @HELP;
- sub xdir {
- my ($self) = @_;
+ sub xdir ($;$) {
+ my ($self, $rdonly) = @_;
if ($self->{version} == 1) {
"$self->{mainrepo}/public-inbox/xapian" . SCHEMA_VERSION;
} else {
my $dir = "$self->{mainrepo}/xap" . SCHEMA_VERSION;
+ return $dir if $rdonly;
+
my $part = $self->{partition};
defined $part or die "partition not given";
$dir .= "/$part";
}
}
+ sub xdb ($) {
+ my ($self) = @_;
+ $self->{xdb} ||= do {
+ load_xapian();
+ my $dir = xdir($self, 1);
+ if ($self->{version} >= 2) {
+ my $xdb;
+ foreach my $part (<$dir/*>) {
+ -d $part && $part =~ m!/\d+\z! or next;
+ my $sub = Search::Xapian::Database->new($part);
+ if ($xdb) {
+ $xdb->add_database($sub);
+ } else {
+ $xdb = $sub;
+ }
+ }
+ $xdb;
+ } else {
+ Search::Xapian::Database->new($dir);
+ }
+ };
+ }
+
sub new {
my ($class, $mainrepo, $altid) = @_;
my $version = 1;
altid => $altid,
version => $version,
}, $class;
- my $dir;
- if ($version >= 2) {
- $dir = "$self->{mainrepo}/xap" . SCHEMA_VERSION;
- my $xdb;
- my $parts = 0;
- foreach my $part (<$dir/*>) {
- -d $part && $part =~ m!/\d+\z! or next;
- $parts++;
- my $sub = Search::Xapian::Database->new($part);
- if ($xdb) {
- $xdb->add_database($sub);
- } else {
- $xdb = $sub;
- }
- }
- $self->{xdb} = $xdb;
- } else {
- $dir = $self->xdir;
- $self->{xdb} = Search::Xapian::Database->new($dir);
- }
+ my $dir = xdir($self, 1);
$self->{over_ro} = PublicInbox::Over->new("$dir/over.sqlite3");
$self;
}
sub reopen {
my ($self) = @_;
- $self->{xdb}->reopen;
+ if (my $xdb = $self->{xdb}) {
+ $xdb->reopen;
+ }
$self; # make chaining easier
}
if ($query_string eq '' && !$opts->{mset}) {
$self->{over_ro}->recent($opts);
} else {
- my $query = $self->qp->parse_query($query_string, QP_FLAGS);
+ my $qp = qp($self);
+ my $query = $qp->parse_query($query_string, $QP_FLAGS);
$opts->{relevance} = 1 unless exists $opts->{relevance};
_do_enquire($self, $query, $opts);
}
}
- sub get_thread {
- my ($self, $mid, $prev) = @_;
- $self->{over_ro}->get_thread($mid, $prev);
- }
-
sub retry_reopen {
my ($self, $cb) = @_;
for my $i (1..10) {
sub _enquire_once {
my ($self, $query, $opts) = @_;
- my $enquire = Search::Xapian::Enquire->new($self->{xdb});
+ my $xdb = xdb($self);
+ my $enquire = Search::Xapian::Enquire->new($xdb);
$enquire->set_query($query);
$opts ||= {};
my $desc = !$opts->{asc};
my $qp = $self->{query_parser};
return $qp if $qp;
-
+ my $xdb = xdb($self);
# new parser
$qp = Search::Xapian::QueryParser->new;
- $qp->set_default_op(OP_AND);
- $qp->set_database($self->{xdb});
+ $qp->set_default_op(OP_AND());
+ $qp->set_database($xdb);
$qp->set_stemmer($self->stemmer);
- $qp->set_stemming_strategy(STEM_SOME);
+ $qp->set_stemming_strategy(STEM_SOME());
$qp->set_max_wildcard_expansion(100);
$qp->add_valuerangeprocessor(
Search::Xapian::NumberValueRangeProcessor->new(YYYYMMDD, 'd:'));
$self->{query_parser} = $qp;
}
- # only used for NNTP server
- sub query_xover {
- my ($self, $beg, $end, $offset) = @_;
- $self->{over_ro}->query_xover($beg, $end, $offset);
- }
-
- sub query_ts {
- my ($self, $ts, $prev) = @_;
- $self->{over_ro}->query_ts($ts, $prev);
- }
-
sub lookup_article {
my ($self, $num) = @_;
$self->{over_ro}->get_art($num);
}
- sub next_by_mid {
- my ($self, $mid, $id, $prev) = @_;
- $self->{over_ro}->next_by_mid($mid, $id, $prev);
- }
-
- # normalize subjects so they are suitable as pathnames for URLs
- # XXX: consider for removal
- sub subject_path {
- my $subj = pop;
- $subj = subject_normalized($subj);
- $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
- lc($subj);
- }
-
- sub subject_normalized {
- my $subj = pop;
- $subj =~ s/\A\s+//s; # no leading space
- $subj =~ s/\s+\z//s; # no trailing space
- $subj =~ s/\s+/ /gs; # no redundant spaces
- $subj =~ s/\.+\z//; # no trailing '.'
- $subj =~ s/$REPLY_RE//igo; # remove reply prefix
- $subj;
- }
-
sub help {
my ($self) = @_;
$self->qp; # parse altids
use PublicInbox::OverIdx;
use PublicInbox::Spawn qw(spawn);
use PublicInbox::Git qw(git_unquote);
-use Compress::Zlib qw(compress);
use constant {
BATCH_BYTES => defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ?
die("Invalid indexlevel $ibx->{indexlevel}\n");
}
}
- } else { # v1
+ } else { # FIXME: old tests: old tests
$ibx = { mainrepo => $git_dir, version => 1 };
}
$ibx = PublicInbox::InboxWritable->new($ibx);
- require Search::Xapian::WritableDatabase;
my $self = bless {
mainrepo => $mainrepo,
-inbox => $ibx,
$self;
}
+ sub need_xapian ($) { $_[0]->{indexlevel} =~ $xapianlevels }
+
sub _xdb_release {
my ($self) = @_;
- my $xdb = delete $self->{xdb} or croak 'not acquired';
- $xdb->close;
+ if (need_xapian($self)) {
+ my $xdb = delete $self->{xdb} or croak 'not acquired';
+ $xdb->close;
+ }
$self->lock_release if $self->{creat};
undef;
}
sub _xdb_acquire {
my ($self) = @_;
- croak 'already acquired' if $self->{xdb};
+ my $flag;
my $dir = $self->xdir;
- my $flag = Search::Xapian::DB_OPEN;
+ if (need_xapian($self)) {
+ croak 'already acquired' if $self->{xdb};
+ PublicInbox::Search::load_xapian();
+ require Search::Xapian::WritableDatabase;
+ $flag = $self->{creat} ?
+ Search::Xapian::DB_CREATE_OR_OPEN() :
+ Search::Xapian::DB_OPEN();
+ }
if ($self->{creat}) {
require File::Path;
$self->lock_acquire;
- File::Path::mkpath($dir);
- $flag = Search::Xapian::DB_CREATE_OR_OPEN;
+
+ # don't create empty Xapian directories if we don't need Xapian
+ my $is_part = defined($self->{partition});
+ if (!$is_part || ($is_part && need_xapian($self))) {
+ File::Path::mkpath($dir);
+ }
}
+ return unless defined $flag;
$self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag);
}
$num = index_mm($self, $mime);
}
eval {
- if ($self->{indexlevel} =~ $xapianlevels) {
+ if (need_xapian($self)) {
$self->add_xapian($mime, $num, $oid, $mids, $mid0)
}
if (my $over = $self->{over}) {
# v1 only, where $mid is unique
sub remove_message {
my ($self, $mid) = @_;
- my $db = $self->{xdb};
$mid = mid_clean($mid);
if (my $over = $self->{over}) {
warn "<$mid> missing for removal from overview\n";
}
}
- return if $self->{indexlevel} !~ $xapianlevels;
+ return unless need_xapian($self);
+ my $db = $self->{xdb};
my $nr = 0;
eval {
batch_do($self, 'Q' . $mid, sub {
# MID is a hint in V2
sub remove_by_oid {
my ($self, $oid, $mid) = @_;
- my $db = $self->{xdb};
$self->{over}->remove_oid($oid, $mid) if $self->{over};
+ return unless need_xapian($self);
+ my $db = $self->{xdb};
+
# XXX careful, we cannot use batch_do here since we conditionally
# delete documents based on other factors, so we cannot call
# find_doc_ids twice.
my ($self, $mm) = @_;
my $lm = $mm->last_commit || '';
my $lx = '';
- if ($self->{indexlevel} =~ $xapianlevels) {
+ if (need_xapian($self)) {
$lx = $self->{xdb}->get_metadata('last_commit') || '';
} else {
$lx = $lm;
$self->{over}->disconnect;
$git->cleanup;
delete $self->{txn};
- $xdb->cancel_transaction;
+ $xdb->cancel_transaction if $xdb;
$xdb = _xdb_release($self);
# ensure we leak no FDs to "git log" with Xapian <= 1.2
}
$dbh->commit;
}
- if ($newest && $self->{indexlevel} =~ $xapianlevels) {
+ if ($newest && need_xapian($self)) {
my $cur = $xdb->get_metadata('last_commit');
if (need_update($self, $cur, $newest)) {
$xdb->set_metadata('last_commit', $newest);
$self->{-inbox}->with_umask(sub {
my $xdb = $self->{xdb} || $self->_xdb_acquire;
$self->{over}->begin_lazy if $self->{over};
- $xdb->begin_transaction;
+ $xdb->begin_transaction if $xdb;
$self->{txn} = 1;
$xdb;
});
my ($self) = @_;
delete $self->{txn} or return;
$self->{-inbox}->with_umask(sub {
- $self->{xdb}->commit_transaction;
+ if (my $xdb = $self->{xdb}) {
+ $xdb->commit_transaction;
+ }
$self->{over}->commit_lazy if $self->{over};
});
}
sub worker_done {
my ($self) = @_;
- die "$$ $0 xdb not released\n" if $self->{xdb};
+ if (need_xapian($self)) {
+ die "$$ $0 xdb not released\n" if $self->{xdb};
+ }
die "$$ $0 still in transaction\n" if $self->{txn};
}
my $ctx = { env => $env, www => $self };
# we don't care about multi-value
- my %qp = map {
+ %{$ctx->{qp}} = map {
utf8::decode($_);
- my ($k, $v) = split('=', uri_unescape($_), 2);
- $v = '' unless defined $v;
- $v =~ tr/+/ /;
- ($k, $v)
+ tr/+/ /;
+ my ($k, $v) = split('=', $_, 2);
+ $v = uri_unescape($v // '');
+ # none of the keys we care about will need escaping
+ $k => $v;
} split(/[&;]+/, $env->{QUERY_STRING});
- $ctx->{qp} = \%qp;
# avoiding $env->{PATH_INFO} here since that's already decoded
my ($path_info) = ($env->{REQUEST_URI} =~ path_re($env));
require PublicInbox::MIME;
require Digest::SHA;
require POSIX;
-
- foreach (qw(PublicInbox::Search PublicInbox::SearchView
+ eval {
+ require PublicInbox::Search;
+ PublicInbox::Search::load_xapian();
+ };
+ foreach (qw(PublicInbox::SearchView
PublicInbox::Mbox IO::Compress::Gzip
PublicInbox::NewsWWW)) {
eval "require $_;";
my ($ctx) = @_;
if ($ctx && $ctx->{mid}) {
require PublicInbox::ExtMsg;
- searcher($ctx);
return PublicInbox::ExtMsg::ext_msg($ctx);
}
r(404, 'Not Found');
sub get_index {
my ($ctx) = @_;
require PublicInbox::Feed;
- searcher($ctx);
if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
require PublicInbox::SearchView;
PublicInbox::SearchView::sres_top_html($ctx);
sub get_mid_html {
my ($ctx) = @_;
require PublicInbox::View;
- searcher($ctx);
PublicInbox::View::msg_page($ctx) || r404($ctx);
}
# /$INBOX/$MESSAGE_ID/t/
sub get_thread {
my ($ctx, $flat) = @_;
- searcher($ctx) or return need_search($ctx);
+ $ctx->{-inbox}->over or return need($ctx, 'Overview');
$ctx->{flat} = $flat;
require PublicInbox::View;
PublicInbox::View::thread_html($ctx);
$val;
}
- # search support is optional, returns undef if Xapian is not installed
- # or not configured for the given GIT_DIR
- sub searcher {
- my ($ctx) = @_;
- eval {
- require PublicInbox::Search;
- $ctx->{srch} = $ctx->{-inbox}->search;
- };
- }
-
- sub need_search {
- my ($ctx) = @_;
+ sub need {
+ my ($ctx, $extra) = @_;
my $msg = <<EOF;
- <html><head><title>Search not available for this
- public-inbox</title><body><pre>Search is not available for this public-inbox
+ <html><head><title>$extra not available for this
+ public-inbox</title><body><pre>$extra is not available for this public-inbox
<a href="../">Return to index</a></pre></body></html>
EOF
[ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ];
# especially on older systems. Stick to zlib since that's what git uses.
sub get_thread_mbox {
my ($ctx, $sfx) = @_;
- my $srch = searcher($ctx) or return need_search($ctx);
+ my $over = $ctx->{-inbox}->over or return need($ctx, 'Overview');
require PublicInbox::Mbox;
- PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx);
+ PublicInbox::Mbox::thread_mbox($ctx, $over, $sfx);
}
# /$INBOX/$MESSAGE_ID/t.atom -> thread as Atom feed
sub get_thread_atom {
my ($ctx) = @_;
- searcher($ctx) or return need_search($ctx);
+ $ctx->{-inbox}->over or return need($ctx, 'Overview');
require PublicInbox::Feed;
PublicInbox::Feed::generate_thread_atom($ctx);
}
sub mbox_results {
my ($ctx) = @_;
if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
- searcher($ctx) or return need_search($ctx);
+ $ctx->{-inbox}->search or return need($ctx, 'search');
require PublicInbox::SearchView;
return PublicInbox::SearchView::mbox_results($ctx);
}
my ($ctx, $inbox, $range) = @_;
invalid_inbox($ctx, $inbox) || eval {
require PublicInbox::Mbox;
- searcher($ctx);
PublicInbox::Mbox::emit_range($ctx, $range);
}
}