-# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# common stuff for administrative command-line tools
# Unstable internal API
package PublicInbox::Admin;
use strict;
-use warnings;
-use Cwd 'abs_path';
-use base qw(Exporter);
-our @EXPORT_OK = qw(resolve_repo_dir);
+use parent qw(Exporter);
+our @EXPORT_OK = qw(setup_signals);
use PublicInbox::Config;
use PublicInbox::Inbox;
use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Eml;
+*rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed;
-sub resolve_repo_dir {
+sub setup_signals {
+ my ($cb, $arg) = @_; # optional
+ require POSIX;
+
+ # we call exit() here instead of _exit() so DESTROY methods
+ # get called (e.g. File::Temp::Dir and PublicInbox::Msgmap)
+ $SIG{INT} = $SIG{HUP} = $SIG{PIPE} = $SIG{TERM} = sub {
+ my ($sig) = @_;
+ # https://www.tldp.org/LDP/abs/html/exitcodes.html
+ eval { $cb->($sig, $arg) } if $cb;
+ $sig = 'SIG'.$sig;
+ exit(128 + POSIX->$sig);
+ };
+}
+
+sub resolve_eidxdir {
+ my ($cd) = @_;
+ my $try = $cd // '.';
+ my $root_dev_ino;
+ while (1) { # favor v2, first
+ if (-f "$try/ei.lock") {
+ return rel2abs_collapsed($try);
+ } elsif (-d $try) {
+ my @try = stat _;
+ $root_dev_ino //= do {
+ my @root = stat('/') or die "stat /: $!\n";
+ "$root[0]\0$root[1]";
+ };
+ return undef if "$try[0]\0$try[1]" eq $root_dev_ino;
+ $try .= '/..'; # continue, cd up
+ } else {
+ die "`$try' is not a directory\n";
+ }
+ }
+}
+
+sub resolve_inboxdir {
my ($cd, $ver) = @_;
- my $prefix = defined $cd ? $cd : './';
- if (-d $prefix && -f "$prefix/inbox.lock") { # v2
- $$ver = 2 if $ver;
- return abs_path($prefix);
+ my $try = $cd // '.';
+ my $root_dev_ino;
+ while (1) { # favor v2, first
+ if (-f "$try/inbox.lock") {
+ $$ver = 2 if $ver;
+ return rel2abs_collapsed($try);
+ } elsif (-d $try) {
+ my @try = stat _;
+ $root_dev_ino //= do {
+ my @root = stat('/') or die "stat /: $!\n";
+ "$root[0]\0$root[1]";
+ };
+ last if "$try[0]\0$try[1]" eq $root_dev_ino;
+ $try .= '/..'; # continue, cd up
+ } else {
+ die "`$try' is not a directory\n";
+ }
}
+ # try v1 bare git dirs
my $cmd = [ qw(git rev-parse --git-dir) ];
my $fh = popen_rd($cmd, undef, {-C => $cd});
my $dir = do { local $/; <$fh> };
- close $fh or die "error in ".join(' ', @$cmd)." (cwd:$cd): $!\n";
+ close $fh or die "error in @$cmd (cwd:${\($cd // '.')}): $!\n";
chomp $dir;
$$ver = 1 if $ver;
- return abs_path($cd) if ($dir eq '.' && defined $cd);
- abs_path($dir);
+ rel2abs_collapsed($dir eq '.' ? ($cd // $dir) : $dir);
}
# for unconfigured inboxes
sub detect_indexlevel ($) {
my ($ibx) = @_;
- # brand new or never before indexed inboxes default to full
- return 'full' unless $ibx->over;
- delete $ibx->{over}; # don't leave open FD lying around
+ my $over = $ibx->over;
+ my $srch = $ibx->search;
+ delete @$ibx{qw(over search)}; # don't leave open FDs lying around
+ # brand new or never before indexed inboxes default to full
+ return 'full' unless $over;
my $l = 'basic';
- my $srch = $ibx->search or return $l;
- delete $ibx->{search}; # don't leave open FD lying around
+ return $l unless $srch;
if (my $xdb = $srch->xdb) {
$l = 'full';
my $m = $xdb->get_metadata('indexlevel');
$ibx->{inboxdir} has unexpected indexlevel in Xapian: $m
}
+ $ibx->{-skip_docdata} = 1 if $xdb->get_metadata('skip_docdata');
}
$l;
}
name => $name,
address => [ "$name\@example.com" ],
inboxdir => $dir,
- # TODO: consumers may want to warn on this:
- #-unconfigured => 1,
+ # consumers (-convert) warn on this:
+ -unconfigured => 1,
});
}
$cfg or die "--all specified, but $cfgfile not readable\n";
@$argv and die "--all specified, but directories specified\n";
}
-
+ my (@old, @ibxs, @eidx);
+ if ($opt->{-eidx_ok}) {
+ require PublicInbox::ExtSearchIdx;
+ my $i = -1;
+ @$argv = grep {
+ $i++;
+ if (defined(my $ei = resolve_eidxdir($_))) {
+ $ei = PublicInbox::ExtSearchIdx->new($ei, $opt);
+ push @eidx, $ei;
+ undef;
+ } else {
+ 1;
+ }
+ } @$argv;
+ }
my $min_ver = $opt->{-min_inbox_version} || 0;
- my (@old, @ibxs);
- my %dir2ibx;
- if ($cfg) {
+ # lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames,
+ # pathnames are not unique due to symlinks and bind mounts
+ if ($opt->{all}) {
$cfg->each_inbox(sub {
my ($ibx) = @_;
- my $path = abs_path($ibx->{inboxdir});
- if (defined($path)) {
- $dir2ibx{$path} = $ibx;
+ if (-e $ibx->{inboxdir}) {
+ push(@ibxs, $ibx) if $ibx->version >= $min_ver;
} else {
- warn <<EOF;
-W: $ibx->{name} $ibx->{inboxdir}: $!
-EOF
+ warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n";
}
});
- }
- if ($opt->{all}) {
- my @all = values %dir2ibx;
- @all = grep { $_->version >= $min_ver } @all;
- push @ibxs, @all;
} else { # directories specified on the command-line
- my $i = 0;
my @dirs = @$argv;
- push @dirs, '.' unless @dirs;
- foreach (@dirs) {
- my $v;
- my $dir = resolve_repo_dir($_, \$v);
- if ($v < $min_ver) {
+ push @dirs, '.' if !@dirs && $opt->{-use_cwd};
+ my %s2i; # "st_dev\0st_ino" => array index
+ for (my $i = 0; $i <= $#dirs; $i++) {
+ my $dir = $dirs[$i];
+ my @st = stat($dir) or die "stat($dir): $!\n";
+ $dir = $dirs[$i] = resolve_inboxdir($dir, \(my $ver));
+ if ($ver >= $min_ver) {
+ $s2i{"$st[0]\0$st[1]"} //= $i;
+ } else {
push @old, $dir;
- next;
}
- my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i);
- $i++;
- push @ibxs, $ibx;
}
+ my $done = \'done';
+ eval {
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ return if $ibx->version < $min_ver;
+ my $dir = $ibx->{inboxdir};
+ if (my @s = stat $dir) {
+ my $i = delete($s2i{"$s[0]\0$s[1]"})
+ // return;
+ $ibxs[$i] = $ibx;
+ die $done if !keys(%s2i);
+ } else {
+ warn "W: $ibx->{name} $dir: $!\n";
+ }
+ });
+ };
+ die $@ if $@ && $@ ne $done;
+ for my $i (sort { $a <=> $b } values %s2i) {
+ $ibxs[$i] = unconfigured_ibx($dirs[$i], $i);
+ }
+ @ibxs = grep { defined } @ibxs; # duplicates are undef
}
if (@old) {
- die "inboxes $min_ver inboxes not supported by $0\n\t",
+ die "-V$min_ver inboxes not supported by $0\n\t",
join("\n\t", @old), "\n";
}
- @ibxs;
+ $opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs;
}
-# TODO: make Devel::Peek optional, only used for daemon
-my @base_mod = qw(Devel::Peek);
+my @base_mod = ();
my @over_mod = qw(DBD::SQLite DBI);
my %mod_groups = (
-index => [ @base_mod, @over_mod ],
die missing_mod_msg($err) ." required for indexlevel=$indexlevel\n";
}
+sub index_terminate {
+ my (undef, $ibx) = @_; # $_[0] = signal name
+ $ibx->git->cleanup;
+}
+
sub index_inbox {
my ($ibx, $im, $opt) = @_;
+ require PublicInbox::InboxWritable;
my $jobs = delete $opt->{jobs} if $opt;
- if (ref($ibx) && $ibx->version == 2) {
+ if (my $pr = $opt->{-progress}) {
+ $pr->("indexing $ibx->{inboxdir} ...\n");
+ }
+ local @SIG{keys %SIG} = values %SIG;
+ setup_signals(\&index_terminate, $ibx);
+ my $idx = { current_info => $ibx->{inboxdir} };
+ local $SIG{__WARN__} = sub {
+ return if PublicInbox::Eml::warn_ignore(@_);
+ warn($idx->{current_info}, ': ', @_);
+ };
+ if ($ibx->version == 2) {
eval { require PublicInbox::V2Writable };
die "v2 requirements not met: $@\n" if $@;
- my $v2w = $im // eval { $ibx->importer(0) } || eval {
- PublicInbox::V2Writable->new($ibx, {nproc=>$jobs});
- };
+ $ibx->{-creat_opt}->{nproc} = $jobs;
+ my $v2w = $im // $ibx->importer($opt->{reindex} // $jobs);
if (defined $jobs) {
if ($jobs == 0) {
$v2w->{parallel} = 0;
} else {
my $n = $v2w->{shards};
- if ($jobs != ($n + 1) && !$opt->{reshard}) {
- warn
-"Unable to respect --jobs=$jobs, inbox was created with $n shards\n";
+ if ($jobs < ($n + 1) && !$opt->{reshard}) {
+ warn <<EOM;
+Unable to respect --jobs=$jobs on index, inbox was created with $n shards
+EOM
}
}
}
- my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
- local $SIG{__WARN__} = sub {
- $warn_cb->($v2w->{current_info}, ': ', @_);
- };
- $v2w->index_sync($opt);
+ $idx = $v2w;
} else {
require PublicInbox::SearchIdx;
- my $s = PublicInbox::SearchIdx->new($ibx, 1);
- $s->index_sync($opt);
+ $idx = PublicInbox::SearchIdx->new($ibx, 1);
}
+ $idx->index_sync($opt);
+ $idx->{nidx} // 0; # returns number processed
}
-sub progress_prepare ($) {
- my ($opt) = @_;
+sub progress_prepare ($;$) {
+ my ($opt, $dst) = @_;
# public-inbox-index defaults to quiet, -xcpdb and -compact do not
if (defined($opt->{quiet}) && $opt->{quiet} < 0) {
$opt->{1} = $null; # suitable for spawn() redirect
} else {
$opt->{verbose} ||= 1;
- $opt->{-progress} = sub { print STDERR @_ };
+ $dst //= *STDERR{GLOB};
+ $opt->{-progress} = sub { print $dst '# ', @_ };
}
}
# same unit factors as git:
sub parse_unsigned ($) {
- my ($max_size) = @_;
+ my ($val) = @_;
- $$max_size =~ /\A([0-9]+)([kmg])?\z/i or return;
+ $$val =~ /\A([0-9]+)([kmg])?\z/i or return;
my ($n, $unit_factor) = ($1, $2 // '');
my %u = ( k => 1024, m => 1024**2, g => 1024**3 );
- $$max_size = $n * ($u{lc($unit_factor)} // 1);
+ $$val = $n * ($u{lc($unit_factor)} // 1);
1;
}
+sub index_prepare ($$) {
+ my ($opt, $cfg) = @_;
+ my $env;
+ if ($opt->{compact}) {
+ require PublicInbox::Xapcmd;
+ PublicInbox::Xapcmd::check_compact();
+ $opt->{compact_opt} = { -coarse_lock => 1, compact => 1 };
+ if (defined(my $jobs = $opt->{jobs})) {
+ $opt->{compact_opt}->{jobs} = $jobs;
+ }
+ }
+ for my $k (qw(max_size batch_size)) {
+ my $git_key = "publicInbox.index".ucfirst($k);
+ $git_key =~ s/_([a-z])/\U$1/g;
+ defined(my $v = $opt->{$k} // $cfg->{lc($git_key)}) or next;
+ parse_unsigned(\$v) or die "`$git_key=$v' not parsed\n";
+ $v > 0 or die "`$git_key=$v' must be positive\n";
+ $opt->{$k} = $v;
+ }
+
+ # out-of-the-box builds of Xapian 1.4.x are still limited to 32-bit
+ # https://getting-started-with-xapian.readthedocs.io/en/latest/concepts/indexing/limitations.html
+ $opt->{batch_size} and
+ $env = { XAPIAN_FLUSH_THRESHOLD => '4294967295' };
+
+ for my $k (qw(sequential-shard)) {
+ my $git_key = "publicInbox.index".ucfirst($k);
+ $git_key =~ s/-([a-z])/\U$1/g;
+ defined(my $s = $opt->{$k} // $cfg->{lc($git_key)}) or next;
+ defined(my $v = $cfg->git_bool($s))
+ or die "`$git_key=$s' not boolean\n";
+ $opt->{$k} = $v;
+ }
+ for my $k (qw(since until)) {
+ my $v = $opt->{$k} // next;
+ $opt->{reindex} or die "--$k=$v requires --reindex\n";
+ }
+ $env;
+}
+
+sub do_chdir ($) {
+ my $chdir = $_[0] // return;
+ for my $d (@$chdir) {
+ next if $d eq ''; # same as git(1)
+ chdir $d or die "cd $d: $!";
+ }
+}
+
1;