- Search::Xapian deb: libsearch-xapian-perl
pkg: p5-Search-Xapian
rpm: perl-Search-Xapian
- (for v2, HTTP search)
+ (HTTP search)
- Net::Server deb: libnet-server-perl
pkg: pkg-Net-Server
* large mbox/Maildir/MH/NNTP spool import (see PublicInbox::Import)
-* Allow NNTP and more of PSGI code to work without Xapian
-
* Read-only WebDAV interface to the git repo so it can be mounted
via davfs2 or fusedav to avoid full clones.
davfs2 needs Range: request support for this to be feasible:
# bare minimum for v2
-$profiles->{v2essential} = [ @{$profiles->{essential}}, qw(
- DBD::SQLite
- DBI
- Search::Xapian
- xapian-compact
-) ];
+$profiles->{v2essential} = [ @{$profiles->{essential}}, qw(DBD::SQLite DBI) ];
# package names which can't be mapped automatically:
my $non_auto = {
EOF
;;
debian-sid|debian-9|debian-10) sed "s/^/$PKG_FMT /" <<EOF
-all devtest-
all devtest
+all devtest Search::Xapian-
+all devtest-
v2essential
essential
essential devtest-
}
}
+# TODO: make Devel::Peek optional, only used for daemon
+my @base_mod = qw(Email::MIME Date::Parse Devel::Peek);
+my @over_mod = qw(DBD::SQLite DBI);
+my %mod_groups = (
+ -index => [ @base_mod, @over_mod ],
+ -base => \@base_mod,
+ -search => [ @base_mod, @over_mod, 'Search::Xapian' ],
+);
+
+sub scan_ibx_modules ($$) {
+ my ($mods, $ibx) = @_;
+ if (!$ibx->{indexlevel} || $ibx->{indexlevel} ne 'basic') {
+ $mods->{'Search::Xapian'} = 1;
+ } else {
+ $mods->{$_} = 1 foreach @over_mod;
+ }
+}
+
+sub check_require {
+ my (@mods) = @_;
+ my $err = {};
+ while (my $mod = shift @mods) {
+ if (my $groups = $mod_groups{$mod}) {
+ push @mods, @$groups;
+ } else {
+ eval "require $mod";
+ $err->{$mod} = $@ if $@;
+ }
+ }
+ scalar keys %$err ? $err : undef;
+}
+
+sub missing_mod_msg {
+ my ($err) = @_;
+ my @mods = map { "`$_'" } sort keys %$err;
+ my $last = pop @mods;
+ @mods ? (join(', ', @mods)."' and $last") : $last
+}
+
+sub require_or_die {
+ my $err = check_require(@_) or return;
+ die missing_mod_msg($err)." required for $0\n";
+}
+
+sub indexlevel_ok_or_die ($) {
+ my ($indexlevel) = @_;
+ my $req;
+ if ($indexlevel eq 'basic') {
+ $req = '-index';
+ } elsif ($indexlevel =~ /\A(?:medium|full)\z/) {
+ $req = '-search';
+ } else {
+ die <<"";
+invalid indexlevel=$indexlevel (must be `basic', `medium', or `full')
+
+ }
+ my $err = check_require($req) or return;
+ die missing_mod_msg($err) ." required for indexlevel=$indexlevel\n";
+}
+
1;
sub generate_thread_atom {
my ($ctx) = @_;
my $mid = $ctx->{mid};
- my $msgs = $ctx->{srch}->get_thread($mid);
+ my $ibx = $ctx->{-inbox};
+ my $msgs = $ibx->over->get_thread($mid);
return _no_thread() unless @$msgs;
- my $ibx = $ctx->{-inbox};
my $html_url = $ibx->base_url($ctx->{env});
$html_url .= PublicInbox::Hval->new_msgid($mid)->{href};
$ctx->{-html_url} = $html_url;
# if the 'r' query parameter is given, it is a legacy permalink
# which we must continue supporting:
my $qp = $ctx->{qp};
- if ($qp && !$qp->{r} && $ctx->{srch}) {
+ my $ibx = $ctx->{-inbox};
+ if ($qp && !$qp->{r} && $ibx->over) {
return PublicInbox::View::index_topics($ctx);
}
my $env = $ctx->{env};
- my $url = $ctx->{-inbox}->base_url($env) . 'new.html';
+ my $url = $ibx->base_url($env) . 'new.html';
my $qs = $env->{QUERY_STRING};
$url .= "?$qs" if $qs ne '';
[302, [ 'Location', $url, 'Content-Type', 'text/plain'],
if ($v > 2) {
die "BUG: unsupported inbox version: $v\n";
}
- if (my $srch = $ibx->search) {
+ if ($ibx->over) {
return PublicInbox::View::paginate_recent($ctx, $max);
}
for my $ibx (values %$CLEANUP) {
my $again;
if ($have_devel_peek) {
- foreach my $f (qw(mm search)) {
+ foreach my $f (qw(mm search over)) {
# we bump refcnt by assigning tmp, here:
my $tmp = $ibx->{$f} or next;
next if Devel::Peek::SvREFCNT($tmp) > 2;
}
}
if ($have_devel_peek) {
- $again ||= !!($ibx->{mm} || $ibx->{search});
+ $again ||= !!($ibx->{over} || $ibx->{mm} ||
+ $ibx->{search});
}
$next->{"$ibx"} = $ibx if $again;
}
};
}
-sub search {
- my ($self) = @_;
- $self->{search} ||= eval {
+sub search ($;$) {
+ my ($self, $over_only) = @_;
+ my $srch = $self->{search} ||= eval {
_cleanup_later($self);
+ require PublicInbox::Search;
PublicInbox::Search->new($self, $self->{altid});
};
+ ($over_only || eval { $srch->xdb }) ? $srch : undef;
+}
+
+sub over ($) {
+ my ($self) = @_;
+ my $srch = search($self, 1) or return;
+ $self->{over} ||= eval {
+ my $over = $srch->{over_ro};
+ $over->dbh_new; # may fail
+ $over;
+ }
}
sub try_cat {
sub nntp_usable {
my ($self) = @_;
- my $ret = $self->mm && $self->search;
- $self->{mm} = $self->{search} = undef;
+ my $ret = mm($self) && over($self);
+ $self->{mm} = $self->{over} = $self->{search} = undef;
$ret;
}
sub smsg_by_mid ($$) {
my ($self, $mid) = @_;
- my $srch = search($self) or return;
+ my $over = over($self) or return;
# favor the Message-ID we used for the NNTP article number:
defined(my $num = mid2num($self, $mid)) or return;
- my $smsg = $srch->lookup_article($num) or return;
+ my $smsg = $over->get_art($num) or return;
PublicInbox::SearchMsg::psgi_cull($smsg);
}
sub msg_by_mid ($$;$) {
my ($self, $mid, $ref) = @_;
- my $srch = search($self) or
+
+ over($self) or
return msg_by_path($self, mid2path($mid), $ref);
+
my $smsg = smsg_by_mid($self, $mid);
$smsg ? msg_by_smsg($self, $smsg, $ref) : undef;
}
sub recent {
my ($self, $opts, $after, $before) = @_;
- search($self)->{over_ro}->recent($opts, $after, $before);
+ over($self)->recent($opts, $after, $before);
}
sub modified {
my ($self) = @_;
- if (my $srch = search($self)) {
- my $msgs = $srch->{over_ro}->recent({limit => 1});
+ if (my $over = over($self)) {
+ my $msgs = $over->recent({limit => 1});
if (my $smsg = $msgs->[0]) {
return $smsg->{ts};
}
}
$cur = $next or return;
my $ibx = $ctx->{-inbox};
- $next = $ibx->search->next_by_mid($ctx->{mid}, \$id, \$prev);
+ $next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev);
@$more = ($ctx, $id, $prev, $next); # $next may be undef, here
my $mref = $ibx->msg_by_smsg($cur) or return;
msg_str($ctx, Email::Simple->new($mref));
my $ibx = $ctx->{-inbox};
my $first;
my $more;
- if (my $srch = $ibx->search) {
+ if (my $over = $ibx->over) {
my ($id, $prev);
- my $smsg = $srch->next_by_mid($mid, \$id, \$prev) or return;
+ my $smsg = $over->next_by_mid($mid, \$id, \$prev) or return;
my $mref = $ibx->msg_by_smsg($smsg) or return;
$first = Email::Simple->new($mref);
- my $next = $srch->next_by_mid($mid, \$id, \$prev);
+ my $next = $over->next_by_mid($mid, \$id, \$prev);
# $more is for ->getline
$more = [ $ctx, $id, $prev, $next, $first ] if $next;
} else {
}
sub thread_mbox {
- my ($ctx, $srch, $sfx) = @_;
+ my ($ctx, $over, $sfx) = @_;
eval { require IO::Compress::Gzip };
return sub { need_gzip(@_) } if $@;
my $mid = $ctx->{mid};
- my $msgs = $srch->get_thread($mid, {});
+ my $msgs = $over->get_thread($mid, {});
return [404, [qw(Content-Type text/plain)], []] if !@$msgs;
my $prev = $msgs->[-1];
my $i = 0;
return $smsg;
}
# refill result set
- $msgs = $srch->get_thread($mid, $prev);
+ $msgs = $over->get_thread($mid, $prev);
return unless @$msgs;
$prev = $msgs->[-1];
$i = 0;
sub mbox_all_ids {
my ($ctx) = @_;
my $prev = 0;
- my $ids = $ctx->{-inbox}->mm->ids_after(\$prev) or return
+ my $ibx = $ctx->{-inbox};
+ my $ids = $ibx->mm->ids_after(\$prev) or return
[404, [qw(Content-Type text/plain)], ["No results found\n"]];
my $i = 0;
- my $over = $ctx->{srch}->{over_ro};
+ my $over = $ibx->over or
+ return PublicInbox::WWW::need($ctx, 'Overview');
my $cb = sub {
do {
while ((my $num = $ids->[$i++])) {
my $smsg = $over->get_art($num) or next;
return $smsg;
}
- $ids = $ctx->{-inbox}->mm->ids_after(\$prev);
+ $ids = $ibx->mm->ids_after(\$prev);
$i = 0;
} while (@$ids);
undef;
return sub { need_gzip(@_) } if $@;
return mbox_all_ids($ctx) if $query eq '';
my $opts = { mset => 2 };
- my $srch = $ctx->{srch};
+ my $srch = $ctx->{-inbox}->search or
+ return PublicInbox::WWW::need($ctx, 'Search');;
my $mset = $srch->query($query, $opts);
$opts->{offset} = $mset->size or
return [404, [qw(Content-Type text/plain)],
my ($keep, $skip) = split('!', $newsgroups, 2);
ngpat2re($keep);
ngpat2re($skip);
- my @srch;
+ my @over;
foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
$ng->{newsgroup} =~ $keep or next;
$ng->{newsgroup} =~ $skip and next;
- my $srch = $ng->search or next;
- push @srch, $srch;
+ my $over = $ng->over or next;
+ push @over, $over;
};
- return '.' unless @srch;
+ return '.' unless @over;
my $prev = 0;
long_response($self, sub {
- my $srch = $srch[0];
- my $msgs = $srch->query_ts($ts, $prev);
+ my $over = $over[0];
+ my $msgs = $over->query_ts($ts, $prev);
if (scalar @$msgs) {
more($self, '<' .
join(">\r\n<", map { $_->mid } @$msgs ).
'>');
$prev = $msgs->[-1]->{num};
} else {
- shift @srch;
- if (@srch) { # continue onto next newsgroup
+ shift @over;
+ if (@over) { # continue onto next newsgroup
$prev = 0;
return 1;
} else { # break out of the long response.
defined $mid or return $err;
}
found:
- my $smsg = $ng->search->{over_ro}->get_art($n) or return $err;
+ my $smsg = $ng->over->get_art($n) or return $err;
my $msg = $ng->msg_by_smsg($smsg) or return $err;
my $s = Email::Simple->new($msg);
if ($set_headers) {
}
}
-sub search_header_for {
- my ($srch, $num, $field) = @_;
- my $smsg = $srch->{over_ro}->get_art($num) or return;
+sub over_header_for {
+ my ($over, $num, $field) = @_;
+ my $smsg = $over->get_art($num) or return;
return PublicInbox::SearchMsg::date($smsg) if $field eq 'date';
$smsg->{$field};
}
if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID
my ($ng, $n) = mid_lookup($self, $1);
return r430 unless defined $n;
- my $v = search_header_for($ng->search, $n, $field);
+ my $v = over_header_for($ng->over, $n, $field);
hdr_mid_response($self, $xhdr, $ng, $n, $range, $v);
} else { # numeric range
$range = $self->{article} unless defined $range;
- my $srch = $self->{ng}->search;
+ my $over = $self->{ng}->over;
my $mm = $self->{ng}->mm;
my $r = get_range($self, $range);
return $r unless ref $r;
more($self, $xhdr ? r221 : r225);
my $cur = $beg;
long_response($self, sub {
- my $msgs = $srch->query_xover($cur, $end);
+ my $msgs = $over->query_xover($cur, $end);
my $nr = scalar @$msgs or return;
my $tmp = '';
foreach my $s (@$msgs) {
return $r unless ref $r;
my ($beg, $end) = @$r;
my $mm = $ng->mm;
- my $srch = $ng->search;
+ my $over = $ng->over;
more($self, '224 Overview information follows');
long_response($self, sub {
- my $h = search_header_for($srch, $beg, 'references');
+ my $h = over_header_for($over, $beg, 'references');
more($self, "$beg $h") if defined($h);
$beg++ < $end;
});
if ($range && $range =~ /\A<(.+)>\z/) {
my ($ng, $n) = mid_lookup($self, $1);
defined $n or return r430;
- my $smsg = $ng->search->{over_ro}->get_art($n) or return r430;
+ my $smsg = $ng->over->get_art($n) or return r430;
more($self, '224 Overview information follows (multi-line)');
# Only set article number column if it's the current group
return $r unless ref $r;
my ($beg, $end) = @$r;
more($self, "224 Overview information follows for $beg to $end");
- my $srch = $self->{ng}->search;
+ my $over = $self->{ng}->over;
my $cur = $beg;
long_response($self, sub {
- my $msgs = $srch->query_xover($cur, $end);
+ my $msgs = $over->query_xover($cur, $end);
my $nr = scalar @$msgs or return;
# OVERVIEW.FMT
use IO::Handle;
use DBI qw(:sql_types); # SQL_BLOB
use PublicInbox::MID qw/id_compress mids references/;
-use PublicInbox::SearchMsg;
+use PublicInbox::SearchMsg qw(subject_normalized);
use Compress::Zlib qw(compress);
use PublicInbox::Search;
\@keep;
}
+# normalize subjects so they are suitable as pathnames for URLs
+# XXX: consider for removal
+sub subject_path ($) {
+ my ($subj) = @_;
+ $subj = subject_normalized($subj);
+ $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
+ lc($subj);
+}
+
sub add_overview {
my ($self, $mime, $bytes, $num, $oid, $mid0) = @_;
my $lines = $mime->body_raw =~ tr!\n!\n!;
my $subj = $smsg->subject;
my $xpath;
if ($subj ne '') {
- $xpath = PublicInbox::Search::subject_path($subj);
+ $xpath = subject_path($subj);
$xpath = id_compress($xpath);
}
my $dd = $smsg->to_doc_data($oid, $mid0);
use constant YYYYMMDD => 1; # Date: header for searching in the WWW UI
use constant DT => 2; # Date: YYYYMMDDHHMMSS
-use Search::Xapian qw/:standard/;
use PublicInbox::SearchMsg;
use PublicInbox::MIME;
use PublicInbox::MID qw/id_compress/;
use PublicInbox::Over;
+my $QP_FLAGS;
+sub load_xapian () {
+ $QP_FLAGS ||= eval {
+ require Search::Xapian;
+ Search::Xapian->import(qw(:standard));
+
+ # n.b. FLAG_PURE_NOT is expensive not suitable for a public
+ # website as it could become a denial-of-service vector
+ # FLAG_PHRASE also seems to cause performance problems
+ # sometimes.
+ # TODO: make this an option, maybe?
+ # or make indexlevel=medium as default
+ FLAG_PHRASE()|FLAG_BOOLEAN()|FLAG_LOVEHATE()|FLAG_WILDCARD();
+ };
+};
# This is English-only, everything else is non-standard and may be confused as
# a prefix common in patch emails
-our $REPLY_RE = qr/^re:\s+/i;
our $LANG = 'english';
use constant {
# (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0)
# 14 - fix ghost root vivification
SCHEMA_VERSION => 15,
-
- # n.b. FLAG_PURE_NOT is expensive not suitable for a public website
- # as it could become a denial-of-service vector
- #
- # FLAG_PHRASE also seems to cause performance problems sometimes.
- # TODO: make this an option, maybe?
- # or make indexlevel=medium as default
- QP_FLAGS => FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
};
my %bool_pfx_external = (
);
chomp @HELP;
-sub xdir {
- my ($self) = @_;
+sub xdir ($;$) {
+ my ($self, $rdonly) = @_;
if ($self->{version} == 1) {
"$self->{mainrepo}/public-inbox/xapian" . SCHEMA_VERSION;
} else {
my $dir = "$self->{mainrepo}/xap" . SCHEMA_VERSION;
+ return $dir if $rdonly;
+
my $part = $self->{partition};
defined $part or die "partition not given";
$dir .= "/$part";
}
}
+sub xdb ($) {
+ my ($self) = @_;
+ $self->{xdb} ||= do {
+ load_xapian();
+ my $dir = xdir($self, 1);
+ if ($self->{version} >= 2) {
+ my $xdb;
+ foreach my $part (<$dir/*>) {
+ -d $part && $part =~ m!/\d+\z! or next;
+ my $sub = Search::Xapian::Database->new($part);
+ if ($xdb) {
+ $xdb->add_database($sub);
+ } else {
+ $xdb = $sub;
+ }
+ }
+ $xdb;
+ } else {
+ Search::Xapian::Database->new($dir);
+ }
+ };
+}
+
sub new {
my ($class, $mainrepo, $altid) = @_;
my $version = 1;
altid => $altid,
version => $version,
}, $class;
- my $dir;
- if ($version >= 2) {
- $dir = "$self->{mainrepo}/xap" . SCHEMA_VERSION;
- my $xdb;
- my $parts = 0;
- foreach my $part (<$dir/*>) {
- -d $part && $part =~ m!/\d+\z! or next;
- $parts++;
- my $sub = Search::Xapian::Database->new($part);
- if ($xdb) {
- $xdb->add_database($sub);
- } else {
- $xdb = $sub;
- }
- }
- $self->{xdb} = $xdb;
- } else {
- $dir = $self->xdir;
- $self->{xdb} = Search::Xapian::Database->new($dir);
- }
+ my $dir = xdir($self, 1);
$self->{over_ro} = PublicInbox::Over->new("$dir/over.sqlite3");
$self;
}
sub reopen {
my ($self) = @_;
- $self->{xdb}->reopen;
+ if (my $xdb = $self->{xdb}) {
+ $xdb->reopen;
+ }
$self; # make chaining easier
}
if ($query_string eq '' && !$opts->{mset}) {
$self->{over_ro}->recent($opts);
} else {
- my $query = $self->qp->parse_query($query_string, QP_FLAGS);
+ my $qp = qp($self);
+ my $query = $qp->parse_query($query_string, $QP_FLAGS);
$opts->{relevance} = 1 unless exists $opts->{relevance};
_do_enquire($self, $query, $opts);
}
}
-sub get_thread {
- my ($self, $mid, $prev) = @_;
- $self->{over_ro}->get_thread($mid, $prev);
-}
-
sub retry_reopen {
my ($self, $cb) = @_;
for my $i (1..10) {
sub _enquire_once {
my ($self, $query, $opts) = @_;
- my $enquire = Search::Xapian::Enquire->new($self->{xdb});
+ my $xdb = xdb($self);
+ my $enquire = Search::Xapian::Enquire->new($xdb);
$enquire->set_query($query);
$opts ||= {};
my $desc = !$opts->{asc};
my $qp = $self->{query_parser};
return $qp if $qp;
-
+ my $xdb = xdb($self);
# new parser
$qp = Search::Xapian::QueryParser->new;
- $qp->set_default_op(OP_AND);
- $qp->set_database($self->{xdb});
+ $qp->set_default_op(OP_AND());
+ $qp->set_database($xdb);
$qp->set_stemmer($self->stemmer);
- $qp->set_stemming_strategy(STEM_SOME);
+ $qp->set_stemming_strategy(STEM_SOME());
$qp->set_max_wildcard_expansion(100);
$qp->add_valuerangeprocessor(
Search::Xapian::NumberValueRangeProcessor->new(YYYYMMDD, 'd:'));
$self->{query_parser} = $qp;
}
-# only used for NNTP server
-sub query_xover {
- my ($self, $beg, $end, $offset) = @_;
- $self->{over_ro}->query_xover($beg, $end, $offset);
-}
-
-sub query_ts {
- my ($self, $ts, $prev) = @_;
- $self->{over_ro}->query_ts($ts, $prev);
-}
-
sub lookup_article {
my ($self, $num) = @_;
$self->{over_ro}->get_art($num);
}
-sub next_by_mid {
- my ($self, $mid, $id, $prev) = @_;
- $self->{over_ro}->next_by_mid($mid, $id, $prev);
-}
-
-# normalize subjects so they are suitable as pathnames for URLs
-# XXX: consider for removal
-sub subject_path {
- my $subj = pop;
- $subj = subject_normalized($subj);
- $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
- lc($subj);
-}
-
-sub subject_normalized {
- my $subj = pop;
- $subj =~ s/\A\s+//s; # no leading space
- $subj =~ s/\s+\z//s; # no trailing space
- $subj =~ s/\s+/ /gs; # no redundant spaces
- $subj =~ s/\.+\z//; # no trailing '.'
- $subj =~ s/$REPLY_RE//igo; # remove reply prefix
- $subj;
-}
-
sub help {
my ($self) = @_;
$self->qp; # parse altids
die("Invalid indexlevel $ibx->{indexlevel}\n");
}
}
- } else { # v1
+ } else { # FIXME: old tests: old tests
$ibx = { mainrepo => $git_dir, version => 1 };
}
$ibx = PublicInbox::InboxWritable->new($ibx);
- require Search::Xapian::WritableDatabase;
my $self = bless {
mainrepo => $mainrepo,
-inbox => $ibx,
$self;
}
+sub need_xapian ($) { $_[0]->{indexlevel} =~ $xapianlevels }
+
sub _xdb_release {
my ($self) = @_;
- my $xdb = delete $self->{xdb} or croak 'not acquired';
- $xdb->close;
+ if (need_xapian($self)) {
+ my $xdb = delete $self->{xdb} or croak 'not acquired';
+ $xdb->close;
+ }
$self->lock_release if $self->{creat};
undef;
}
sub _xdb_acquire {
my ($self) = @_;
- croak 'already acquired' if $self->{xdb};
+ my $flag;
my $dir = $self->xdir;
- my $flag = Search::Xapian::DB_OPEN;
+ if (need_xapian($self)) {
+ croak 'already acquired' if $self->{xdb};
+ PublicInbox::Search::load_xapian();
+ require Search::Xapian::WritableDatabase;
+ $flag = $self->{creat} ?
+ Search::Xapian::DB_CREATE_OR_OPEN() :
+ Search::Xapian::DB_OPEN();
+ }
if ($self->{creat}) {
require File::Path;
$self->lock_acquire;
- File::Path::mkpath($dir);
- $flag = Search::Xapian::DB_CREATE_OR_OPEN;
+
+ # don't create empty Xapian directories if we don't need Xapian
+ my $is_part = defined($self->{partition});
+ if (!$is_part || ($is_part && need_xapian($self))) {
+ File::Path::mkpath($dir);
+ }
}
+ return unless defined $flag;
$self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag);
}
$num = index_mm($self, $mime);
}
eval {
- if ($self->{indexlevel} =~ $xapianlevels) {
+ if (need_xapian($self)) {
$self->add_xapian($mime, $num, $oid, $mids, $mid0)
}
if (my $over = $self->{over}) {
# v1 only, where $mid is unique
sub remove_message {
my ($self, $mid) = @_;
- my $db = $self->{xdb};
$mid = mid_clean($mid);
if (my $over = $self->{over}) {
warn "<$mid> missing for removal from overview\n";
}
}
- return if $self->{indexlevel} !~ $xapianlevels;
+ return unless need_xapian($self);
+ my $db = $self->{xdb};
my $nr = 0;
eval {
batch_do($self, 'Q' . $mid, sub {
# MID is a hint in V2
sub remove_by_oid {
my ($self, $oid, $mid) = @_;
- my $db = $self->{xdb};
$self->{over}->remove_oid($oid, $mid) if $self->{over};
+ return unless need_xapian($self);
+ my $db = $self->{xdb};
+
# XXX careful, we cannot use batch_do here since we conditionally
# delete documents based on other factors, so we cannot call
# find_doc_ids twice.
my ($self, $mm) = @_;
my $lm = $mm->last_commit || '';
my $lx = '';
- if ($self->{indexlevel} =~ $xapianlevels) {
+ if (need_xapian($self)) {
$lx = $self->{xdb}->get_metadata('last_commit') || '';
} else {
$lx = $lm;
$self->{over}->disconnect;
$git->cleanup;
delete $self->{txn};
- $xdb->cancel_transaction;
+ $xdb->cancel_transaction if $xdb;
$xdb = _xdb_release($self);
# ensure we leak no FDs to "git log" with Xapian <= 1.2
}
$dbh->commit;
}
- if ($newest && $self->{indexlevel} =~ $xapianlevels) {
+ if ($newest && need_xapian($self)) {
my $cur = $xdb->get_metadata('last_commit');
if (need_update($self, $cur, $newest)) {
$xdb->set_metadata('last_commit', $newest);
$self->{-inbox}->with_umask(sub {
my $xdb = $self->{xdb} || $self->_xdb_acquire;
$self->{over}->begin_lazy if $self->{over};
- $xdb->begin_transaction;
+ $xdb->begin_transaction if $xdb;
$self->{txn} = 1;
$xdb;
});
my ($self) = @_;
delete $self->{txn} or return;
$self->{-inbox}->with_umask(sub {
- $self->{xdb}->commit_transaction;
+ if (my $xdb = $self->{xdb}) {
+ $xdb->commit_transaction;
+ }
$self->{over}->commit_lazy if $self->{over};
});
}
sub worker_done {
my ($self) = @_;
- die "$$ $0 xdb not released\n" if $self->{xdb};
+ if (need_xapian($self)) {
+ die "$$ $0 xdb not released\n" if $self->{xdb};
+ }
die "$$ $0 still in transaction\n" if $self->{txn};
}
package PublicInbox::SearchMsg;
use strict;
use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw(subject_normalized);
use PublicInbox::MID qw/mid_clean mid_mime/;
use PublicInbox::Address;
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
sub _extract_mid { mid_clean(mid_mime($_[0]->{mime})) }
+our $REPLY_RE = qr/^re:\s+/i;
+
+sub subject_normalized ($) {
+ my ($subj) = @_;
+ $subj =~ s/\A\s+//s; # no leading space
+ $subj =~ s/\s+\z//s; # no trailing space
+ $subj =~ s/\s+/ /gs; # no redundant spaces
+ $subj =~ s/\.+\z//; # no trailing '.'
+ $subj =~ s/$REPLY_RE//igo; # remove reply prefix
+ $subj;
+}
+
1;
sub sres_top_html {
my ($ctx) = @_;
+ my $srch = $ctx->{-inbox}->search or
+ return PublicInbox::WWW::need($ctx, 'Search');
my $q = PublicInbox::SearchQuery->new($ctx->{qp});
my $x = $q->{x};
my $query = $q->{'q'};
my ($mset, $total, $err, $cb);
retry:
eval {
- $mset = $ctx->{srch}->query($query, $opts);
+ $mset = $srch->query($query, $opts);
$total = $mset->get_matches_estimated;
};
$err = $@;
my $pad = length("$total");
my $pfx = ' ' x $pad;
my $res = \($ctx->{-html_tip});
- my $srch = $ctx->{srch};
my $ibx = $ctx->{-inbox};
+ my $srch = $ibx->search;
my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
foreach my $m ($mset->items) {
my $rank = sprintf("%${pad}d", $m->get_rank + 1);
sub mset_thread {
my ($ctx, $mset, $q) = @_;
my %pct;
- my $srch = $ctx->{srch};
- my $msgs = $srch->retry_reopen(sub { [ map {
+ my $ibx = $ctx->{-inbox};
+ my $msgs = $ibx->search->retry_reopen(sub { [ map {
my $i = $_;
my $smsg = PublicInbox::SearchMsg->load_doc($i->get_document);
$pct{$smsg->mid} = $i->get_percent;
$r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ds,
$ctx);
my $skel = search_nav_bot($mset, $q). "<pre>";
- my $ibx = $ctx->{-inbox};
$ctx->{-upfx} = '';
$ctx->{anchor_idx} = 1;
$ctx->{cur_level} = 0;
my $ibx = $ctx->{-inbox};
my @items = $mset->items;
$ctx->{search_query} = $q;
- my $srch = $ctx->{srch};
+ my $srch = $ibx->search;
PublicInbox::WwwAtomStream->response($ctx, 200, sub {
while (my $x = shift @items) {
$x = load_doc_retry($srch, $x);
use PublicInbox::ViewDiff qw(flush_diff);
require POSIX;
use Time::Local qw(timegm);
-
+use PublicInbox::SearchMsg qw(subject_normalized);
use constant COLS => 72;
use constant INDENT => ' ';
use constant TCHILD => '` ';
my $ibx = $ctx->{-inbox};
my ($first, $more);
my $smsg;
- if (my $srch = $ibx->search) {
+ if (my $over = $ibx->over) {
my ($id, $prev);
- $smsg = $srch->next_by_mid($mid, \$id, \$prev);
+ $smsg = $over->next_by_mid($mid, \$id, \$prev);
$first = $ibx->msg_by_smsg($smsg) if $smsg;
if ($first) {
- my $next = $srch->next_by_mid($mid, \$id, \$prev);
+ my $next = $over->next_by_mid($mid, \$id, \$prev);
$more = [ $id, $prev, $next ] if $next;
}
return unless $first;
my $mid = $ctx->{mid};
my $ibx = $ctx->{-inbox};
$smsg = $ibx->smsg_mime($smsg);
- my $next = $ctx->{srch}->next_by_mid($mid, \$id, \$prev);
+ my $next = $ibx->over->next_by_mid($mid, \$id, \$prev);
@$more = $next ? ($id, $prev, $next) : ();
if ($smsg) {
my $mime = $smsg->{mime};
# this is already inside a <pre>
sub index_entry {
my ($smsg, $ctx, $more) = @_;
- my $srch = $ctx->{srch};
my $subj = $smsg->subject;
my $mid_raw = $smsg->mid;
my $id = id_compress($mid_raw, 1);
sub thread_html {
my ($ctx) = @_;
my $mid = $ctx->{mid};
- my $srch = $ctx->{srch};
- my ($nr, $msgs) = $srch->get_thread($mid);
+ my $ibx = $ctx->{-inbox};
+ my ($nr, $msgs) = $ibx->over->get_thread($mid);
return missing_thread($ctx) if $nr == 0;
my $skel = '<hr><pre>';
$skel .= $nr == 1 ? 'only message in thread' : 'end of thread';
my $rootset = thread_results($ctx, $msgs);
# reduce hash lookups in pre_thread->skel_dump
- my $ibx = $ctx->{-inbox};
$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
walk_thread($rootset, $ctx, *pre_thread);
sub _msg_html_prepare {
my ($hdr, $ctx, $more, $nr) = @_;
- my $srch = $ctx->{srch} if $ctx;
my $atom = '';
+ my $over = $ctx->{-inbox}->over;
my $obfs_ibx = $ctx->{-obfs_ibx};
my $rv = '';
my $mids = mids($hdr);
} else {
$rv .= '<pre>';
}
- if ($srch) {
+ if ($over) {
$ctx->{-upfx} = '../';
}
my @title;
if (defined($v = $hdr->header('Subject')) && ($v ne '')) {
$v = ascii_html($v);
obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
- if ($srch) {
+ if ($over) {
$rv .= qq(Subject: <a\nhref="#r"\nid=t>$v</a>\n);
} else {
$rv .= "Subject: $v\n";
}
$title[0] = $v;
} else { # dummy anchor for thread skeleton at bottom of page
- $rv .= qq(<a\nhref="#r"\nid=t></a>) if $srch;
+ $rv .= qq(<a\nhref="#r"\nid=t></a>) if $over;
$title[0] = '(no subject)';
}
if (defined($v = $hdr->header('Date'))) {
$rv .= "(<a\nhref=\"raw\">raw</a>)\n";
}
}
- $rv .= _parent_headers($hdr, $srch);
+ $rv .= _parent_headers($hdr, $over);
$rv .= "\n";
}
sub thread_skel {
my ($dst, $ctx, $hdr, $tpfx) = @_;
- my $srch = $ctx->{srch};
my $mid = mids($hdr)->[0];
- my ($nr, $msgs) = $srch->get_thread($mid);
+ my $ibx = $ctx->{-inbox};
+ my ($nr, $msgs) = $ibx->over->get_thread($mid);
my $expand = qq(expand[<a\nhref="${tpfx}T/#u">flat</a>) .
qq(|<a\nhref="${tpfx}t/#u">nested</a>] ) .
qq(<a\nhref="${tpfx}t.mbox.gz">mbox.gz</a> ) .
my $subj = $hdr->header('Subject');
defined $subj or $subj = '';
$subj = '(no subject)' if $subj eq '';
- $ctx->{prev_subj} = [ split(/ /, $srch->subject_normalized($subj)) ];
+ $ctx->{prev_subj} = [ split(/ /, subject_normalized($subj)) ];
$ctx->{cur} = $mid;
$ctx->{prev_attr} = '';
$ctx->{prev_level} = 0;
$ctx->{dst} = $dst;
# reduce hash lookups in skel_dump
- my $ibx = $ctx->{-inbox};
$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
walk_thread(thread_results($ctx, $msgs), $ctx, *skel_dump);
}
sub _parent_headers {
- my ($hdr, $srch) = @_;
+ my ($hdr, $over) = @_;
my $rv = '';
my $refs = references($hdr);
# do not display References: if search is present,
# we show the thread skeleton at the bottom, instead.
- return $rv if $srch;
+ return $rv if $over;
if (@$refs) {
- @$refs = map { linkify_ref_nosrch($_) } @$refs;
+ @$refs = map { linkify_ref_no_over($_) } @$refs;
$rv .= 'References: '. join("\n\t", @$refs) . "\n";
}
$rv;
sub html_footer {
my ($hdr, $standalone, $ctx, $rhref) = @_;
- my $srch = $ctx->{srch} if $ctx;
+ my $ibx = $ctx->{-inbox} if $ctx;
my $upfx = '../';
my $tpfx = '';
my $idx = $standalone ? " <a\nhref=\"$upfx\">index</a>" : '';
my $irt = '';
- if ($idx && $srch) {
+ if ($idx && $ibx->over) {
$idx .= "\n";
thread_skel(\$idx, $ctx, $hdr, $tpfx);
my ($next, $prev);
$irt .= $idx;
}
-sub linkify_ref_nosrch {
+sub linkify_ref_no_over {
my $v = PublicInbox::Hval->new_msgid($_[0]);
my $html = $v->as_html;
my $href = $v->{href};
# Subject is never undef, this mail was loaded from
# our Xapian which would've resulted in '' if it were
# really missing (and Filter rejects empty subjects)
- my @subj = split(/ /, $ctx->{srch}->subject_normalized($smsg->subject));
+ my @subj = split(/ /, subject_normalized($smsg->subject));
# remove common suffixes from the subject if it matches the previous,
# so we do not show redundant text at the end.
# returns 200 if done, 404 if not
sub acc_topic {
my ($ctx, $level, $node) = @_;
- my $srch = $ctx->{srch};
my $mid = $node->{id};
my $x = $node->{smsg} || $ctx->{-inbox}->smsg_by_mid($mid);
my ($subj, $ds);
my $topic;
if ($x) {
$subj = $x->subject;
- $subj = $srch->subject_normalized($subj);
+ $subj = subject_normalized($subj);
$subj = '(no subject)' if $subj eq '';
$ds = $x->ds;
if ($level == 0) {
my @out;
my $ibx = $ctx->{-inbox};
my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
- my $srch = $ctx->{srch};
# sort by recency, this allows new posts to "bump" old topics...
foreach my $topic (sort { $b->[0] <=> $a->[0] } @$order) {
my $level = $ex[$i];
my $subj = $ex[$i + 1];
$mid = delete $seen->{$subj};
- my @subj = split(/ /, $srch->subject_normalized($subj));
+ my @subj = split(/ /, subject_normalized($subj));
my @next_prev = @subj; # full copy
my $omit = dedupe_subject($prev_subj, \@subj, ' "');
$prev_subj = \@next_prev;
require PublicInbox::MIME;
require Digest::SHA;
require POSIX;
-
- foreach (qw(PublicInbox::Search PublicInbox::SearchView
+ eval {
+ require PublicInbox::Search;
+ PublicInbox::Search::load_xapian();
+ };
+ foreach (qw(PublicInbox::SearchView
PublicInbox::Mbox IO::Compress::Gzip
PublicInbox::NewsWWW)) {
eval "require $_;";
my ($ctx) = @_;
if ($ctx && $ctx->{mid}) {
require PublicInbox::ExtMsg;
- searcher($ctx);
return PublicInbox::ExtMsg::ext_msg($ctx);
}
r(404, 'Not Found');
sub get_index {
my ($ctx) = @_;
require PublicInbox::Feed;
- searcher($ctx);
if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
require PublicInbox::SearchView;
PublicInbox::SearchView::sres_top_html($ctx);
sub get_mid_html {
my ($ctx) = @_;
require PublicInbox::View;
- searcher($ctx);
PublicInbox::View::msg_page($ctx) || r404($ctx);
}
# /$INBOX/$MESSAGE_ID/t/
sub get_thread {
my ($ctx, $flat) = @_;
- searcher($ctx) or return need_search($ctx);
+ $ctx->{-inbox}->over or return need($ctx, 'Overview');
$ctx->{flat} = $flat;
require PublicInbox::View;
PublicInbox::View::thread_html($ctx);
$val;
}
-# search support is optional, returns undef if Xapian is not installed
-# or not configured for the given GIT_DIR
-sub searcher {
- my ($ctx) = @_;
- eval {
- require PublicInbox::Search;
- $ctx->{srch} = $ctx->{-inbox}->search;
- };
-}
-
-sub need_search {
- my ($ctx) = @_;
+sub need {
+ my ($ctx, $extra) = @_;
my $msg = <<EOF;
-<html><head><title>Search not available for this
-public-inbox</title><body><pre>Search is not available for this public-inbox
+<html><head><title>$extra not available for this
+public-inbox</title><body><pre>$extra is not available for this public-inbox
<a href="../">Return to index</a></pre></body></html>
EOF
[ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ];
# especially on older systems. Stick to zlib since that's what git uses.
sub get_thread_mbox {
my ($ctx, $sfx) = @_;
- my $srch = searcher($ctx) or return need_search($ctx);
+ my $over = $ctx->{-inbox}->over or return need($ctx, 'Overview');
require PublicInbox::Mbox;
- PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx);
+ PublicInbox::Mbox::thread_mbox($ctx, $over, $sfx);
}
# /$INBOX/$MESSAGE_ID/t.atom -> thread as Atom feed
sub get_thread_atom {
my ($ctx) = @_;
- searcher($ctx) or return need_search($ctx);
+ $ctx->{-inbox}->over or return need($ctx, 'Overview');
require PublicInbox::Feed;
PublicInbox::Feed::generate_thread_atom($ctx);
}
sub mbox_results {
my ($ctx) = @_;
if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
- searcher($ctx) or return need_search($ctx);
+ $ctx->{-inbox}->search or return need($ctx, 'search');
require PublicInbox::SearchView;
return PublicInbox::SearchView::mbox_results($ctx);
}
my ($ctx, $inbox, $range) = @_;
invalid_inbox($ctx, $inbox) || eval {
require PublicInbox::Mbox;
- searcher($ctx);
PublicInbox::Mbox::emit_range($ctx, $range);
}
}
use warnings;
use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
my $usage = "public-inbox-index REPO_DIR";
-use PublicInbox::Config;
use PublicInbox::Admin qw(resolve_repo_dir);
+PublicInbox::Admin::require_or_die('-index');
+require PublicInbox::Config;
my $config = eval { PublicInbox::Config->new } || eval {
warn "public-inbox unconfigured for serving, indexing anyways...\n";
undef;
};
-eval { require PublicInbox::SearchIdx };
-if ($@) {
- print STDERR "Search::Xapian required for $0\n";
- exit 1;
-}
my $reindex;
my $prune;
my $jobs = undef;
+my $indexlevel;
my %opts = (
'--reindex' => \$reindex,
'--jobs|j=i' => \$jobs,
'--prune' => \$prune,
+ 'L|indexlevel=s' => \$indexlevel,
);
GetOptions(%opts) or die "bad command-line args\n$usage";
die "--jobs must be positive\n" if defined $jobs && $jobs < 0;
}
});
+my @inboxes;
+my $mods = {};
+
foreach my $dir (@dirs) {
- if (!ref($dir) && -f "$dir/inbox.lock") { # v2
- my $ibx = { mainrepo => $dir, name => 'unnamed' };
- $dir = PublicInbox::Inbox->new($ibx);
+ my $ibx = $dir;
+ if (!ref($ibx)) {
+ unless (-d $dir) {
+ die "$dir does not appear to be an inbox repository\n";
+ }
+ $ibx = PublicInbox::Inbox->new({
+ mainrepo => $dir,
+ name => 'unnamed',
+ indexlevel => $indexlevel,
+ version => -f "$dir/inbox.lock" ? 2 : 1,
+ });
+ } elsif (defined $indexlevel && !defined($ibx->{indexlevel})) {
+ # XXX: users can shoot themselves in the foot, with this...
+ $ibx->{indexlevel} = $indexlevel;
}
- index_dir($dir);
+ push @inboxes, $ibx;
+ PublicInbox::Admin::scan_ibx_modules($mods, $ibx);
}
-sub index_dir {
+PublicInbox::Admin::require_or_die(keys %$mods);
+
+require PublicInbox::SearchIdx;
+index_inbox($_) for @inboxes;
+
+sub index_inbox {
my ($repo) = @_;
- if (!ref $repo && ! -d $repo) {
- die "$repo does not appear to be an inbox repository\n";
- }
if (ref($repo) && ($repo->{version} || 1) == 2) {
eval { require PublicInbox::V2Writable };
die "v2 requirements not met: $@\n" if $@;
use warnings;
my $usage = "public-inbox-init NAME REPO_DIR HTTP_URL ADDRESS [ADDRESS..]";
use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
-use PublicInbox::Config;
-use PublicInbox::Inbox;
+use PublicInbox::Admin;
+PublicInbox::Admin::require_or_die('-base');
+require PublicInbox::Config;
+require PublicInbox::Inbox;
use File::Temp qw/tempfile/;
use File::Basename qw/dirname/;
use File::Path qw/mkpath/;
'S|skip=i' => \$skip,
);
GetOptions(%opts) or usage();
+PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel;
my $name = shift @ARGV or usage();
my $mainrepo = shift @ARGV or usage();
my $http_url = shift @ARGV or usage();
name => $name,
version => $version,
-primary_address => $address[0],
+ indexlevel => $indexlevel,
});
if ($version >= 2) {
use strict;
use warnings;
use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-use PublicInbox::Config;
-use PublicInbox::MIME;
use PublicInbox::Admin qw(resolve_repo_dir);
-use PublicInbox::Filter::Base;
-*REJECT = *PublicInbox::Filter::Base::REJECT;
+PublicInbox::Admin::check_require('-index');
+require PublicInbox::Filter::Base;
+require PublicInbox::Config;
+require PublicInbox::MIME;
+require PublicInbox::V2Writable;
-my $usage = "$0 [--all] [INBOX_DIRS] </path/to/message";
+{ no warnings 'once'; *REJECT = *PublicInbox::Filter::Base::REJECT }
-eval { require PublicInbox::V2Writable } or die
- "DBI, DBD::SQLite and Search::Xapian required for purge\n";
+my $usage = "$0 [--all] [INBOX_DIRS] </path/to/message";
my $config = eval { PublicInbox::Config->new };
my $cfgfile = PublicInbox::Config::default_file();
my ($all, $force);
}
}
+foreach my $ibx (@inboxes) {
+ my $lvl = $ibx->{indexlevel};
+ if (defined $lvl) {
+ PublicInbox::Admin::indexlevel_ok_or_die($lvl);
+ next;
+ }
+
+ # Undefined indexlevel, so `full'...
+ # Search::Xapian exists and the DB can be read, at least, fine
+ $ibx->search and next;
+
+ # it's possible for a Xapian directory to exist, but Search::Xapian
+ # to go missing/broken. Make sure it's purged in that case:
+ $ibx->over or die "no over.sqlite3 in $ibx->{mainrepo}\n";
+
+ # $ibx->{search} is populated by $ibx->over call
+ my $xdir_ro = $ibx->{search}->xdir(1);
+ my $npart = 0;
+ foreach my $part (<$xdir_ro/*>) {
+ if (-d $part && $part =~ m!/\d+\z!) {
+ my $bytes = 0;
+ $bytes += -s $_ foreach glob("$part/*");
+ $npart++ if $bytes;
+ }
+ }
+ if ($npart) {
+ PublicInbox::Admin::require_or_die('-search');
+ } else {
+ # somebody could "rm -r" all the Xapian directories;
+ # let them purge the overview, at least
+ $ibx->{indexlevel} ||= 'basic';
+ }
+}
+
my $data = do { local $/; scalar <STDIN> };
$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
my $n_purged = 0;
# v2
SKIP: {
- for my $m (qw(DBD::SQLite Search::Xapian)) {
+ for my $m (qw(DBD::SQLite)) {
skip "$m missing", 5 unless eval "require $m";
}
use_ok 'PublicInbox::V2Writable';
my %cfg = (
"$cfgpfx.address" => $addr,
"$cfgpfx.mainrepo" => $maindir,
+ "$cfgpfx.indexlevel" => 'basic',
);
while (my ($k,$v) = each %cfg) {
is(0, system(qw(git config --file), $pi_config, $k, $v),
use_ok 'PublicInbox::Git';
use_ok 'PublicInbox::Import';
-use_ok 'Email::MIME';
-my $git = PublicInbox::Git->new($maindir);
-my $im = PublicInbox::Import->new($git, 'test', $addr);
+use_ok 'PublicInbox::Inbox';
+use_ok 'PublicInbox::V1Writable';
+use_ok 'PublicInbox::Config';
+my $cfg = PublicInbox::Config->new($pi_config);
+my $ibx = $cfg->lookup_name('test');
+my $im = PublicInbox::V1Writable->new($ibx);
{
local $ENV{HOME} = $home;
like($res->{head}, qr/^Status: 501 /, "search not-yet-enabled");
my $indexed;
eval {
+ require DBD::SQLite;
require PublicInbox::SearchIdx;
- my $s = PublicInbox::SearchIdx->new($maindir, 1);
+ my $s = PublicInbox::SearchIdx->new($ibx, 1);
$s->index_sync;
$indexed = 1;
};
};
} else {
like($res->{head}, qr/^Status: 501 /, "search not available");
+ SKIP: { skip 'DBD::SQLite not available', 2 };
}
my $have_xml_feed = eval { require XML::Feed; 1 } if $indexed;
my $p = XML::Feed->parse(\($res->{body}));
is($p->format, "Atom", "parsed atom feed");
is(scalar $p->entries, 3, "parsed three entries");
+ } else {
+ SKIP: { skip 'DBD::SQLite or XML::Feed missing', 2 };
}
}
require_git(2.6);
my $this = (split('/', __FILE__))[-1];
-# TODO: remove Search::Xapian as a requirement for basic
-foreach my $mod (qw(DBD::SQLite Search::Xapian)) {
+foreach my $mod (qw(DBD::SQLite)) {
eval "require $mod";
plan skip_all => "$mod missing for $this" if $@;
}
$im->done;
# index master (required for v1)
- is(system($index, $ibx->{mainrepo}), 0, 'index master OK');
- my $ro_master = PublicInbox::Inbox->new({mainrepo => $ibx->{mainrepo}});
+ is(system($index, $ibx->{mainrepo}, "-L$level"), 0, 'index master OK');
+ my $ro_master = PublicInbox::Inbox->new({
+ mainrepo => $ibx->{mainrepo},
+ indexlevel => $level
+ });
my ($nr, $msgs) = $ro_master->recent;
is($nr, 1, 'only one message in master, so far');
is($msgs->[0]->{mid}, 'm@1', 'first message in master indexed');
is(system($index, $mirror), 0, "v$v index mirror OK");
# read-only access
- my $ro_mirror = PublicInbox::Inbox->new({mainrepo => $mirror});
+ my $ro_mirror = PublicInbox::Inbox->new({
+ mainrepo => $mirror,
+ indexlevel => 'basic'
+ });
($nr, $msgs) = $ro_mirror->recent;
is($nr, 1, 'only one message, so far');
is($msgs->[0]->{mid}, 'm@1', 'read first message');
['m@1','m@2'], 'got both messages in mirror');
# incremental index master (required for v1)
- is(system($index, $ibx->{mainrepo}), 0, 'index master OK');
+ is(system($index, $ibx->{mainrepo}, "-L$level"), 0, 'index master OK');
($nr, $msgs) = $ro_master->recent;
is($nr, 2, '2nd message seen in master');
is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs],
is($nr, 1, '2nd message gone from mirror');
is_deeply([map { $_->{mid} } @$msgs], ['m@1'],
'message unavailable in mirror');
+
+ if ($v == 2 && $level eq 'basic') {
+ is_deeply([glob("$ibx->{mainrepo}/xap*/?/")], [],
+ 'no Xapian partition directories for v2 basic');
+ }
}
# we can probably cull some other tests and put full/medium tests, here
use warnings;
use Test::More;
-foreach my $mod (qw(DBD::SQLite Search::Xapian Data::Dumper)) {
+foreach my $mod (qw(DBD::SQLite Data::Dumper)) {
eval "require $mod";
plan skip_all => "$mod missing for nntp.t" if $@;
}
use strict;
use warnings;
use Test::More;
-foreach my $mod (qw(DBD::SQLite Search::Xapian)) {
+foreach my $mod (qw(DBD::SQLite)) {
eval "require $mod";
plan skip_all => "$mod missing for nntpd.t" if $@;
}
name => $group,
version => $version,
-primary_address => $addr,
+ indexlevel => 'basic',
};
$ibx = PublicInbox::Inbox->new($ibx);
{
local $ENV{HOME} = $home;
my @cmd = ($init, $group, $mainrepo, 'http://example.com/', $addr);
- push @cmd, "-V$version";
+ push @cmd, "-V$version", '-Lbasic';
is(system(@cmd), 0, 'init OK');
is(system(qw(git config), "--file=$home/.public-inbox/config",
"publicinbox.$group.newsgroup", $group),
if ($version == 2) {
$im = PublicInbox::V2Writable->new($ibx);
} elsif ($version == 1) {
- my $git = PublicInbox::Git->new($mainrepo);
- $im = PublicInbox::Import->new($git, 'test', $addr);
+ use_ok 'PublicInbox::V1Writable';
+ $im = PublicInbox::V1Writable->new($ibx);
} else {
die "unsupported version: $version";
}
is($rdr, waitpid($rdr, 0), 'reader done');
is($? >> 8, 0, 'no errors');
}
+ SKIP: {
+ my @of = `lsof -p $pid 2>/dev/null`;
+ skip('lsof broken', 1) if (!scalar(@of) || $?);
+ my @xap = grep m!Search/Xapian!, @of;
+ is_deeply(\@xap, [], 'Xapian not loaded in nntpd');
+ }
{
setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1);
syswrite($s, 'HDR List-id 1-');
use Test::More;
use File::Temp qw/tempdir/;
use Compress::Zlib qw(compress);
-# FIXME: allow using Over w/o Xapian
-foreach my $mod (qw(DBD::SQLite Search::Xapian)) {
+foreach my $mod (qw(DBD::SQLite)) {
eval "require $mod";
plan skip_all => "$mod missing for over.t" if $@;
}
use PublicInbox::MIME;
use PublicInbox::Config;
use PublicInbox::WWW;
-my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
+my @mods = qw(DBD::SQLite HTTP::Request::Common Plack::Test
URI::Escape Plack::Builder);
foreach my $mod (@mods) {
eval "require $mod";
name => 'bad-mids',
version => 2,
-primary_address => 'test@example.com',
+ indexlevel => 'basic',
};
$ibx = PublicInbox::Inbox->new($ibx);
my $im = PublicInbox::V2Writable->new($ibx, 1);
use Email::MIME;
use File::Temp qw/tempdir/;
use PublicInbox::Config;
-my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape Search::Xapian
- DBD::SQLite);
+my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape DBD::SQLite);
foreach my $mod (@mods) {
eval "require $mod";
plan skip_all => "$mod missing for psgi_scan_all.t" if $@;
mainrepo => $mainrepo,
name => "test-$i",
version => 2,
+ indexlevel => 'basic',
-primary_address => $addr,
};
my $ibx = PublicInbox::Inbox->new($opt);
use PublicInbox::Config;
use PublicInbox::WWW;
use bytes (); # only for bytes::length
-my @mods = qw(PublicInbox::SearchIdx HTTP::Request::Common Plack::Test
+my @mods = qw(Search::Xapian HTTP::Request::Common Plack::Test
URI::Escape Plack::Builder);
foreach my $mod (@mods) {
eval "require $mod";
plan skip_all => "$mod missing for psgi_search.t" if $@;
}
-use_ok $_ foreach @mods;
+use_ok $_ foreach (@mods, qw(PublicInbox::SearchIdx));
my $tmpdir = tempdir('pi-psgi-search.XXXXXX', TMPDIR => 1, CLEANUP => 1);
my $git_dir = "$tmpdir/a.git";
use File::Temp qw/tempdir/;
require './t/common.perl';
require_git(2.6);
-my @mods = qw(IPC::Run DBI DBD::SQLite Search::Xapian);
+my @mods = qw(IPC::Run DBI DBD::SQLite);
foreach my $mod (@mods) {
eval "require $mod";
plan skip_all => "missing $mod for t/purge.t" if $@;
use File::Temp qw/tempdir/;
use PublicInbox::MID qw(mids);
use Email::MIME;
-eval { require PublicInbox::SearchIdx; };
-plan skip_all => "Xapian missing for search" if $@;
+eval { require Search::Xapian };
+plan skip_all => "Search::Xapian missing for search" if $@;
+require PublicInbox::SearchIdx;
my $tmpdir = tempdir('pi-search-thr-index.XXXXXX', TMPDIR => 1, CLEANUP => 1);
my $git_dir = "$tmpdir/a.git";
use strict;
use warnings;
use Test::More;
-eval { require PublicInbox::SearchIdx; };
-plan skip_all => "Xapian missing for search" if $@;
+eval { require Search::Xapian };
+plan skip_all => "Search::Xapian missing for search" if $@;
+require PublicInbox::SearchIdx;
use File::Temp qw/tempdir/;
use Email::MIME;
my $tmpdir = tempdir('pi-search-XXXXXX', TMPDIR => 1, CLEANUP => 1);
my ($root_id, $last_id);
is(0, system(qw(git init --shared -q --bare), $git_dir), "git init (main)");
-eval { PublicInbox::Search->new($git_dir) };
+eval { PublicInbox::Search->new($git_dir)->xdb };
ok($@, "exception raised on non-existent DB");
my $rw = PublicInbox::SearchIdx->new($git_dir, 1);
$rw_commit->();
$ro->reopen;
- my $t = $ro->get_thread('root@s');
+ my $t = $ro->{over_ro}->get_thread('root@s');
is(scalar(@$t), 4, "got all 4 mesages in thread");
my @exp = sort($long_reply_mid, 'root@s', 'last@s', $long_mid);
@res = filter_mids($t);
if (scalar(@$n) >= 1) {
my $mid = $n->[0]->mid;
my ($id, $prev);
- $art = $ro->next_by_mid($mid, \$id, \$prev);
+ $art = $ro->{over_ro}->next_by_mid($mid, \$id, \$prev);
ok($art, 'article exists in OVER DB');
}
$rw->unindex_blob($amsg);
delete $ibx->{mm};
is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
- my $mset = $ibx->search->reopen->query('hello world', {mset=>1});
- is($mset->size, 0, "no Xapian search results");
+ isnt($ibx->search, 'no search for basic');
my ($min, $max) = $ibx->mm->minmax;
is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged');
delete $ibx->{mm};
is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged');
is($ibx->mm->num_highwater, 10, 'num_highwater as expected');
- my $mset = $ibx->search->query('freedom', {mset=>1});
- is($mset->size, 0, "search fails on indexlevel='basic'");
+
+ isnt($ibx->search, 'no search for basic');
+
for (<"$xap/*/*">) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ }
ok($sizes{medium} > $sizes{basic}, 'basic is smaller than medium');
ok($im->add($mime), 'add excessively long References');
$im->barrier;
- my $msgs = $ibx->search->reopen->get_thread('x'x244);
+ my $msgs = $ibx->search->{over_ro}->get_thread('x'x244);
is(2, scalar(@$msgs), 'got both messages');
is($msgs->[0]->{mid}, 'x'x244, 'stored truncated mid');
is($msgs->[1]->{references}, '<'.('x'x244).'>', 'stored truncated ref');
env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'http' },
-inbox => Plack::Util::inline_object(
name => 'test',
+ over => sub { undef },
search => sub { undef },
base_url => sub { 'http://example.com/' },
cloneurl => sub {[]},
use PublicInbox::Config;
require './t/common.perl';
require_git(2.6);
-my @mods = qw(Filesys::Notify::Simple PublicInbox::V2Writable);
+my @mods = qw(Search::Xapian DBD::SQLite Filesys::Notify::Simple);
foreach my $mod (@mods) {
eval "require $mod";
plan skip_all => "$mod missing for watch_maildir_v2.t" if $@;
}
-
+require PublicInbox::V2Writable;
my $tmpdir = tempdir('watch_maildir-v2-XXXXXX', TMPDIR => 1, CLEANUP => 1);
my $mainrepo = "$tmpdir/v2";
my $maildir = "$tmpdir/md";