-# Copyright (C) 2015-2018 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2019 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# based on notmuch, but with no concept of folders, files or flags
#
use base qw(PublicInbox::Search PublicInbox::Lock);
use PublicInbox::MIME;
use PublicInbox::InboxWritable;
-use PublicInbox::MID qw/mid_clean id_compress mid_mime mids/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime mids_for_index/;
use PublicInbox::MsgIter;
use Carp qw(croak);
use POSIX qw(strftime);
use PublicInbox::OverIdx;
use PublicInbox::Spawn qw(spawn);
use PublicInbox::Git qw(git_unquote);
-
+my $X = \%PublicInbox::Search::X;
+my ($DB_CREATE_OR_OPEN, $DB_OPEN);
use constant {
BATCH_BYTES => defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ?
0x7fffffff : 1_000_000,
my $xapianlevels = qr/\A(?:full|medium)\z/;
sub new {
- my ($class, $ibx, $creat, $part) = @_;
+ my ($class, $ibx, $creat, $shard) = @_;
ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
my $levels = qr/\A(?:full|medium|basic)\z/;
- my $mainrepo = $ibx->{mainrepo};
+ my $inboxdir = $ibx->{inboxdir};
my $version = $ibx->{version} || 1;
my $indexlevel = 'full';
my $altid = $ibx->{altid};
}
$ibx = PublicInbox::InboxWritable->new($ibx);
my $self = bless {
- mainrepo => $mainrepo,
+ inboxdir => $inboxdir,
-inbox => $ibx,
git => $ibx->git,
-altid => $altid,
}, $class;
$ibx->umask_prepare;
if ($version == 1) {
- $self->{lock_path} = "$mainrepo/ssoma.lock";
+ $self->{lock_path} = "$inboxdir/ssoma.lock";
my $dir = $self->xdir;
$self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
} elsif ($version == 2) {
- defined $part or die "partition is required for v2\n";
- # partition is a number
- $self->{partition} = $part;
+ defined $shard or die "shard is required for v2\n";
+ # shard is a number
+ $self->{shard} = $shard;
$self->{lock_path} = undef;
} else {
die "unsupported inbox version=$version\n";
undef;
}
+sub load_xapian_writable () {
+ return 1 if $X->{WritableDatabase};
+ PublicInbox::Search::load_xapian() or return;
+ my $xap = $PublicInbox::Search::Xap;
+ for (qw(Document TermGenerator WritableDatabase)) {
+ $X->{$_} = $xap.'::'.$_;
+ }
+ eval 'require '.$X->{WritableDatabase} or die;
+ *sortable_serialise = $xap.'::sortable_serialise';
+ $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
+ $DB_OPEN = eval($xap.'::DB_OPEN()');
+ 1;
+}
+
sub _xdb_acquire {
my ($self) = @_;
my $flag;
my $dir = $self->xdir;
if (need_xapian($self)) {
croak 'already acquired' if $self->{xdb};
- PublicInbox::Search::load_xapian();
- require Search::Xapian::WritableDatabase;
- $flag = $self->{creat} ?
- Search::Xapian::DB_CREATE_OR_OPEN() :
- Search::Xapian::DB_OPEN();
+ load_xapian_writable();
+ $flag = $self->{creat} ? $DB_CREATE_OR_OPEN : $DB_OPEN;
}
if ($self->{creat}) {
require File::Path;
$self->lock_acquire;
# don't create empty Xapian directories if we don't need Xapian
- my $is_part = defined($self->{partition});
- if (!$is_part || ($is_part && need_xapian($self))) {
+ my $is_shard = defined($self->{shard});
+ if (!$is_shard || ($is_shard && need_xapian($self))) {
File::Path::mkpath($dir);
}
}
return unless defined $flag;
- my $xdb = eval { Search::Xapian::WritableDatabase->new($dir, $flag) };
+ my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) };
if ($@) {
die "Failed opening $dir: ", $@;
}
sub add_val ($$$) {
my ($doc, $col, $num) = @_;
- $num = Search::Xapian::sortable_serialise($num);
+ $num = sortable_serialise($num);
$doc->add_value($col, $num);
}
$in_diff = $self->index_old_diff_fn(\%seen, $fa, $fb,
$xnq);
} elsif (m!^--- ("?a/.+)!) {
- my $fn = (split('/', git_unquote($1), 2))[1];
+ my $fn = $1;
+ $fn = (split('/', git_unquote($fn), 2))[1];
$seen{$fn}++ or $self->index_diff_inc($fn, 'XDFN', $xnq);
$in_diff = 1;
} elsif (m!^\+\+\+ ("?b/.+)!) {
- my $fn = (split('/', git_unquote($1), 2))[1];
+ my $fn = $1;
+ $fn = (split('/', git_unquote($fn), 2))[1];
$seen{$fn}++ or $self->index_diff_inc($fn, 'XDFN', $xnq);
$in_diff = 1;
} elsif (/^--- (\S+)/) {
/^Binary files .* differ/) {
push @xnq, $_;
} elsif ($_ eq '') {
- $in_diff = undef;
+ # possible to be in diff context, some mail may be
+ # stripped by MUA or even GNU diff(1). "git apply"
+ # treats a bare "\n" as diff context, too
} else {
push @xnq, $_;
warn "non-diff line: $_\n" if DEBUG && $_ ne '';
sub add_xapian ($$$$$) {
my ($self, $mime, $num, $oid, $mids, $mid0) = @_;
my $smsg = PublicInbox::SearchMsg->new($mime);
- my $doc = Search::Xapian::Document->new;
+ my $doc = $X->{Document}->new;
my $subj = $smsg->subject;
add_val($doc, PublicInbox::Search::TS(), $smsg->ts);
my @ds = gmtime($smsg->ds);
sub add_message {
# mime = Email::MIME object
my ($self, $mime, $bytes, $num, $oid, $mid0) = @_;
- my $mids = mids($mime->header_obj);
+ my $mids = mids_for_index($mime->header_obj);
$mid0 = $mids->[0] unless defined $mid0; # v1 compatibility
unless (defined $num) { # v1
$self->_msgmap_init;
my ($head, $tail) = $self->find_doc_ids($termval);
return if $head == $tail;
my @ids;
- for (; $head != $tail && @ids < $batch_size; $head->inc) {
+ for (; $head != $tail && @ids < $batch_size; $head++) {
push @ids, $head->get_docid;
}
$cb->(\@ids);
# there is only ONE element in @delete unless we
# have bugs in our v2writable deduplication check
my @delete;
- for (; $head != $tail; $head->inc) {
+ for (; $head != $tail; $head++) {
my $docid = $head->get_docid;
my $doc = $db->get_document($docid);
my $smsg = PublicInbox::SearchMsg->wrap($mid);
my $tg = $self->{term_generator};
return $tg if $tg;
- $tg = Search::Xapian::TermGenerator->new;
+ $tg = $X->{TermGenerator}->new;
$tg->set_stemmer($self->stemmer);
$self->{term_generator} = $tg;
$newest ||= $latest;
}
}
+ close($log) or die "git log failed: \$?=$?";
# get the leftovers
foreach my $blob (keys %D) {
my $mime = do_cat_mail($git, $blob, \$bytes) or next;
die "BUG: _msgmap_init is only for v1\n" if $self->{version} != 1;
$self->{mm} ||= eval {
require PublicInbox::Msgmap;
- PublicInbox::Msgmap->new($self->{mainrepo}, 1);
+ PublicInbox::Msgmap->new($self->{inboxdir}, 1);
};
}
--no-notes --no-color --no-renames
--diff-filter=AM), $range);
++$fcount while <$fh>;
- close $fh;
+ close $fh or die "git log failed: \$?=$?";
my $high = $self->{mm}->num_highwater;
$pr->("$fcount\n") if $pr; # continue previous line
$self->{ntodo} = $fcount;
my $xdb = $self->begin_txn_lazy;
my $mm = _msgmap_init($self);
do {
- $xlog = undef;
+ if ($xlog) {
+ close($xlog) or die "git log failed: \$?=$?";
+ $xlog = undef;
+ }
$last_commit = _last_x_commit($self, $mm);
$lx = reindex_from($opts->{reindex}, $last_commit);
sub remote_remove {
my ($self, $oid, $mid) = @_;
if (my $w = $self->{w}) {
- # triggers remove_by_oid in a partition
+ # triggers remove_by_oid in a shard
print $w "D $oid $mid\n" or die "failed to write remove $!";
} else {
$self->begin_txn_lazy;
$self->{-inbox}->with_umask(sub {
if (my $xdb = $self->{xdb}) {
- # store 'indexlevel=medium' in v2 part=0 and v1 (only part)
+ # store 'indexlevel=medium' in v2 shard=0 and
+ # v1 (only one shard)
# This metadata is read by Admin::detect_indexlevel:
- if (!$self->{partition} # undef or 0, not >0
+ if (!$self->{shard} # undef or 0, not >0
&& $self->{indexlevel} eq 'medium') {
$xdb->set_metadata('indexlevel', 'medium');
}