sub detect_indexlevel ($) {
my ($ibx) = @_;
- # brand new or never before indexed inboxes default to full
- return 'full' unless $ibx->over;
- delete $ibx->{over}; # don't leave open FD lying around
+ my $over = $ibx->over;
+ my $srch = $ibx->search;
+ delete @$ibx{qw(over search)}; # don't leave open FDs lying around
+ # brand new or never before indexed inboxes default to full
+ return 'full' unless $over;
my $l = 'basic';
- my $srch = $ibx->search or return $l;
- delete $ibx->{search}; # don't leave open FD lying around
+ return $l unless $srch;
if (my $xdb = $srch->xdb) {
$l = 'full';
my $m = $xdb->get_metadata('indexlevel');
$ibx->{inboxdir} has unexpected indexlevel in Xapian: $m
}
+ $ibx->{-skip_docdata} = 1 if $xdb->get_metadata('skip_docdata');
}
$l;
}
}
}
if (@old) {
- die "inboxes $min_ver inboxes not supported by $0\n\t",
+ die "-V$min_ver inboxes not supported by $0\n\t",
join("\n\t", @old), "\n";
}
@ibxs;
sub index_inbox {
my ($ibx, $im, $opt) = @_;
my $jobs = delete $opt->{jobs} if $opt;
+ if (my $pr = $opt->{-progress}) {
+ $pr->("indexing $ibx->{inboxdir} ...\n");
+ }
local %SIG = %SIG;
setup_signals(\&index_terminate, $ibx);
if (ref($ibx) && $ibx->version == 2) {
$v2w->{parallel} = 0;
} else {
my $n = $v2w->{shards};
- if ($jobs != ($n + 1) && !$opt->{reshard}) {
+ if ($jobs < ($n + 1) && !$opt->{reshard}) {
warn
-"Unable to respect --jobs=$jobs, inbox was created with $n shards\n";
+"Unable to respect --jobs=$jobs on index, inbox was created with $n shards\n";
}
}
}
# same unit factors as git:
sub parse_unsigned ($) {
- my ($max_size) = @_;
+ my ($val) = @_;
- $$max_size =~ /\A([0-9]+)([kmg])?\z/i or return;
+ $$val =~ /\A([0-9]+)([kmg])?\z/i or return;
my ($n, $unit_factor) = ($1, $2 // '');
my %u = ( k => 1024, m => 1024**2, g => 1024**3 );
- $$max_size = $n * ($u{lc($unit_factor)} // 1);
+ $$val = $n * ($u{lc($unit_factor)} // 1);
1;
}
+sub index_prepare ($$) {
+ my ($opt, $cfg) = @_;
+ my $env;
+ if ($opt->{compact}) {
+ require PublicInbox::Xapcmd;
+ PublicInbox::Xapcmd::check_compact();
+ $opt->{compact_opt} = { -coarse_lock => 1, compact => 1 };
+ if (defined(my $jobs = $opt->{jobs})) {
+ $opt->{compact_opt}->{jobs} = $jobs;
+ }
+ }
+ for my $k (qw(max_size batch_size)) {
+ my $git_key = "publicInbox.index".ucfirst($k);
+ $git_key =~ s/_([a-z])/\U$1/g;
+ defined(my $v = $opt->{$k} // $cfg->{lc($git_key)}) or next;
+ parse_unsigned(\$v) or die "`$git_key=$v' not parsed\n";
+ $v > 0 or die "`$git_key=$v' must be positive\n";
+ $opt->{$k} = $v;
+ }
+
+ # out-of-the-box builds of Xapian 1.4.x are still limited to 32-bit
+ # https://getting-started-with-xapian.readthedocs.io/en/latest/concepts/indexing/limitations.html
+ $opt->{batch_size} and
+ $env = { XAPIAN_FLUSH_THRESHOLD => '4294967295' };
+
+ for my $k (qw(sequential_shard)) {
+ my $git_key = "publicInbox.index".ucfirst($k);
+ $git_key =~ s/_([a-z])/\U$1/g;
+ defined(my $s = $opt->{$k} // $cfg->{lc($git_key)}) or next;
+ defined(my $v = $cfg->git_bool($s))
+ or die "`$git_key=$s' not boolean\n";
+ $opt->{$k} = $v;
+ }
+ $env;
+}
+
1;