use Carp qw(croak);
use POSIX qw(strftime);
use PublicInbox::OverIdx;
-use PublicInbox::Spawn qw(spawn);
+use PublicInbox::Spawn qw(spawn nodatacow_dir);
use PublicInbox::Git qw(git_unquote);
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
-our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size nodatacow_dir);
+our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size);
my $X = \%PublicInbox::Search::X;
my ($DB_CREATE_OR_OPEN, $DB_OPEN);
our $DB_NO_SYNC = 0;
-our $BATCH_BYTES = defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ?
- 0x7fffffff : 1_000_000;
+our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff : 1_000_000;
use constant DEBUG => !!$ENV{DEBUG};
my $xapianlevels = qr/\A(?:full|medium)\z/;
}, $class;
$self->xpfx_init;
$self->{-set_indexlevel_once} = 1 if $indexlevel eq 'medium';
+ if ($ibx->{-skip_docdata}) {
+ $self->{-set_skip_docdata_once} = 1;
+ $self->{-skip_docdata} = 1;
+ }
$ibx->umask_prepare;
if ($version == 1) {
$self->{lock_path} = "$inboxdir/ssoma.lock";
my $dir = $self->xdir;
$self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
$self->{over}->{-no_fsync} = 1 if $ibx->{-no_fsync};
- $self->{index_max_size} = $ibx->{index_max_size};
} elsif ($version == 2) {
defined $shard or die "shard is required for v2\n";
# shard is a number
1;
}
-sub nodatacow_dir ($) {
- my ($dir) = @_;
- opendir my $dh, $dir or die "opendir($dir): $!\n";
- PublicInbox::Spawn::set_nodatacow(fileno($dh));
-}
-
sub idx_acquire {
my ($self) = @_;
my $flag;
msg_iter($eml, \&index_xapian, [ $self, $doc ]);
index_ids($self, $doc, $eml, $mids);
- $smsg->{to} = $smsg->{cc} = ''; # WWW doesn't need these, only NNTP
- PublicInbox::OverIdx::parse_references($smsg, $eml, $mids);
- my $data = $smsg->to_doc_data;
- $doc->set_data($data);
+
+ # by default, we maintain compatibility with v1.5.0 and earlier
+ # by writing to docdata.glass, users who never exect to downgrade can
+ # use --skip-docdata
+ if (!$self->{-skip_docdata}) {
+ # WWW doesn't need {to} or {cc}, only NNTP
+ $smsg->{to} = $smsg->{cc} = '';
+ PublicInbox::OverIdx::parse_references($smsg, $eml, $mids);
+ my $data = $smsg->to_doc_data;
+ $doc->set_data($data);
+ }
+
if (my $altid = $self->{-altid}) {
foreach my $alt (@$altid) {
my $pfx = $alt->{xprefix};
# called by public-inbox-index
sub index_sync {
- my ($self, $opts) = @_;
- delete $self->{lock_path} if $opts->{-skip_lock};
- $self->{ibx}->with_umask(\&_index_sync, $self, $opts);
- if ($opts->{reindex}) {
- my %again = %$opts;
+ my ($self, $opt) = @_;
+ delete $self->{lock_path} if $opt->{-skip_lock};
+ $self->{ibx}->with_umask(\&_index_sync, $self, $opt);
+ if ($opt->{reindex}) {
+ my %again = %$opt;
delete @again{qw(rethread reindex)};
index_sync($self, \%again);
}
sub check_size { # check_async cb for -index --max-size=...
my ($oid, $type, $size, $arg, $git) = @_;
(($type // '') eq 'blob') or die "E: bad $oid in $git->{git_dir}";
- if ($size <= $arg->{index_max_size}) {
+ if ($size <= $arg->{max_size}) {
$git->cat_async($oid, $arg->{index_oid}, $arg);
} else {
- warn "W: skipping $oid ($size > $arg->{index_max_size})\n";
+ warn "W: skipping $oid ($size > $arg->{max_size})\n";
}
}
$self->{mm}->last_commit($newest);
}
} else {
- ${$sync->{max}} = $BATCH_BYTES;
+ ${$sync->{max}} = $self->{batch_bytes};
}
$self->{mm}->{dbh}->commit;
sub process_stack {
my ($self, $sync, $stk) = @_;
my $git = $self->{ibx}->git;
- my $max = $BATCH_BYTES;
+ my $max = $self->{batch_bytes};
my $nr = 0;
$sync->{nr} = \$nr;
$sync->{max} = \$max;
$git->cat_async($oid, \&unindex_both, $self);
}
}
- if ($sync->{index_max_size} = $self->{ibx}->{index_max_size}) {
+ if ($sync->{max_size} = $sync->{-opt}->{max_size}) {
$sync->{index_oid} = \&index_both;
}
while (my ($f, $at, $ct, $oid) = $stk->pop_rec) {
if ($f eq 'm') {
my $arg = { %$sync, autime => $at, cotime => $ct };
- if ($sync->{index_max_size}) {
+ if ($sync->{max_size}) {
$git->check_async($oid, \&check_size, $arg);
} else {
$git->cat_async($oid, \&index_both, $arg);
# indexes all unindexed messages (v1 only)
sub _index_sync {
- my ($self, $opts) = @_;
- my $tip = $opts->{ref} || 'HEAD';
+ my ($self, $opt) = @_;
+ my $tip = $opt->{ref} || 'HEAD';
my $git = $self->{ibx}->git;
+ $self->{batch_bytes} = $opt->{batch_size} // $BATCH_BYTES;
$git->batch_prepare;
- my $pr = $opts->{-progress};
- my $sync = { reindex => $opts->{reindex}, -opt => $opts };
+ my $pr = $opt->{-progress};
+ my $sync = { reindex => $opt->{reindex}, -opt => $opt };
my $xdb = $self->begin_txn_lazy;
- $self->{over}->rethread_prepare($opts);
+ $self->{over}->rethread_prepare($opt);
my $mm = _msgmap_init($self);
if ($sync->{reindex}) {
my $last = $mm->last_commit;
# store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard)
# This metadata is read by Admin::detect_indexlevel:
-sub set_indexlevel {
+sub set_metadata_once {
my ($self) = @_;
- if (!$self->{shard} && # undef or 0, not >0
- delete($self->{-set_indexlevel_once})) {
- my $xdb = $self->{xdb};
+ return if $self->{shard}; # only continue if undef or 0, not >0
+ my $xdb = $self->{xdb};
+
+ if (delete($self->{-set_indexlevel_once})) {
my $level = $xdb->get_metadata('indexlevel');
if (!$level || $level ne 'medium') {
$xdb->set_metadata('indexlevel', 'medium');
}
}
+ if (delete($self->{-set_skip_docdata_once})) {
+ $xdb->get_metadata('skip_docdata') or
+ $xdb->set_metadata('skip_docdata', '1');
+ }
}
sub _commit_txn {
my ($self) = @_;
if (my $xdb = $self->{xdb}) {
- set_indexlevel($self);
+ set_metadata_once($self);
$xdb->commit_transaction;
}
$self->{over}->commit_lazy if $self->{over};