X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FAdmin.pm;h=11ea8f8307c2082ec974d51763df3f5f2983fa8c;hb=23af251dd607c4e75ab1e68063f2c885c48cc035;hp=34aa3129f5228de6166f7b5d84261cdea0631e8a;hpb=0c6d38221ec9fbf9d7c7e0329252b10f17ab7a27;p=public-inbox.git diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 34aa3129..11ea8f83 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -1,82 +1,204 @@ -# Copyright (C) 2019 all contributors +# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # common stuff for administrative command-line tools # Unstable internal API package PublicInbox::Admin; use strict; -use warnings; -use Cwd 'abs_path'; -use base qw(Exporter); -our @EXPORT_OK = qw(resolve_repo_dir); +use parent qw(Exporter); +our @EXPORT_OK = qw(setup_signals); +use PublicInbox::Config; +use PublicInbox::Inbox; +use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::Eml; +*rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed; -sub resolve_repo_dir { +sub setup_signals { + my ($cb, $arg) = @_; # optional + require POSIX; + + # we call exit() here instead of _exit() so DESTROY methods + # get called (e.g. File::Temp::Dir and PublicInbox::Msgmap) + $SIG{INT} = $SIG{HUP} = $SIG{PIPE} = $SIG{TERM} = sub { + my ($sig) = @_; + # https://www.tldp.org/LDP/abs/html/exitcodes.html + eval { $cb->($sig, $arg) } if $cb; + $sig = 'SIG'.$sig; + exit(128 + POSIX->$sig); + }; +} + +sub resolve_eidxdir { + my ($cd) = @_; + my $try = $cd // '.'; + my $root_dev_ino; + while (1) { # favor v2, first + if (-f "$try/ei.lock") { + return rel2abs_collapsed($try); + } elsif (-d $try) { + my @try = stat _; + $root_dev_ino //= do { + my @root = stat('/') or die "stat /: $!\n"; + "$root[0]\0$root[1]"; + }; + return undef if "$try[0]\0$try[1]" eq $root_dev_ino; + $try .= '/..'; # continue, cd up + } else { + die "`$try' is not a directory\n"; + } + } +} + +sub resolve_inboxdir { my ($cd, $ver) = @_; - my $prefix = defined $cd ? $cd : './'; - if (-d $prefix && -f "$prefix/inbox.lock") { # v2 - $$ver = 2 if $ver; - return abs_path($prefix); - } - - my @cmd = qw(git rev-parse --git-dir); - my $cmd = join(' ', @cmd); - my $pid = open my $fh, '-|'; - defined $pid or die "forking $cmd failed: $!\n"; - if ($pid == 0) { - if (defined $cd) { - chdir $cd or die "chdir $cd failed: $!\n"; + my $try = $cd // '.'; + my $root_dev_ino; + while (1) { # favor v2, first + if (-f "$try/inbox.lock") { + $$ver = 2 if $ver; + return rel2abs_collapsed($try); + } elsif (-d $try) { + my @try = stat _; + $root_dev_ino //= do { + my @root = stat('/') or die "stat /: $!\n"; + "$root[0]\0$root[1]"; + }; + last if "$try[0]\0$try[1]" eq $root_dev_ino; + $try .= '/..'; # continue, cd up + } else { + die "`$try' is not a directory\n"; } - exec @cmd; - die "Failed to exec $cmd: $!\n"; - } else { - my $dir = eval { - local $/; - <$fh>; - }; - close $fh or die "error in $cmd: $!\n"; - chomp $dir; - $$ver = 1 if $ver; - return abs_path($cd) if ($dir eq '.' && defined $cd); - abs_path($dir); } + # try v1 bare git dirs + my $cmd = [ qw(git rev-parse --git-dir) ]; + my $fh = popen_rd($cmd, undef, {-C => $cd}); + my $dir = do { local $/; <$fh> }; + close $fh or die "error in @$cmd (cwd:${\($cd // '.')}): $!\n"; + chomp $dir; + $$ver = 1 if $ver; + rel2abs_collapsed($dir eq '.' ? ($cd // $dir) : $dir); } -sub resolve_inboxes { - my ($argv, $warn_on_unconfigured) = @_; - require PublicInbox::Config; - require PublicInbox::Inbox; +# for unconfigured inboxes +sub detect_indexlevel ($) { + my ($ibx) = @_; + + my $over = $ibx->over; + my $srch = $ibx->search; + delete @$ibx{qw(over search)}; # don't leave open FDs lying around - my @ibxs = map { resolve_repo_dir($_) } @$argv; - push(@ibxs, resolve_repo_dir()) unless @ibxs; + # brand new or never before indexed inboxes default to full + return 'full' unless $over; + my $l = 'basic'; + return $l unless $srch; + if (my $xdb = $srch->xdb) { + $l = 'full'; + my $m = $xdb->get_metadata('indexlevel'); + if ($m eq 'medium') { + $l = $m; + } elsif ($m ne '') { + warn <<""; +$ibx->{inboxdir} has unexpected indexlevel in Xapian: $m - my %dir2ibx; - if (my $config = eval { PublicInbox::Config->new }) { - $config->each_inbox(sub { + } + $ibx->{-skip_docdata} = 1 if $xdb->get_metadata('skip_docdata'); + } + $l; +} + +sub unconfigured_ibx ($$) { + my ($dir, $i) = @_; + my $name = "unconfigured-$i"; + PublicInbox::Inbox->new({ + name => $name, + address => [ "$name\@example.com" ], + inboxdir => $dir, + # consumers (-convert) warn on this: + -unconfigured => 1, + }); +} + +sub resolve_inboxes ($;$$) { + my ($argv, $opt, $cfg) = @_; + $opt ||= {}; + + $cfg //= PublicInbox::Config->new; + if ($opt->{all}) { + my $cfgfile = PublicInbox::Config::default_file(); + $cfg or die "--all specified, but $cfgfile not readable\n"; + @$argv and die "--all specified, but directories specified\n"; + } + my (@old, @ibxs, @eidx); + if ($opt->{-eidx_ok}) { + require PublicInbox::ExtSearchIdx; + my $i = -1; + @$argv = grep { + $i++; + if (defined(my $ei = resolve_eidxdir($_))) { + $ei = PublicInbox::ExtSearchIdx->new($ei, $opt); + push @eidx, $ei; + undef; + } else { + 1; + } + } @$argv; + } + my $min_ver = $opt->{-min_inbox_version} || 0; + # lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames, + # pathnames are not unique due to symlinks and bind mounts + if ($opt->{all}) { + $cfg->each_inbox(sub { my ($ibx) = @_; - $dir2ibx{abs_path($ibx->{mainrepo})} = $ibx; + if (-e $ibx->{inboxdir}) { + push(@ibxs, $ibx) if $ibx->version >= $min_ver; + } else { + warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n"; + } }); - } elsif ($warn_on_unconfigured) { - # do we really care about this? It's annoying... - warn $warn_on_unconfigured, "\n"; - } - for my $i (0..$#ibxs) { - my $dir = $ibxs[$i]; - $ibxs[$i] = $dir2ibx{$dir} ||= do { - my $name = "unconfigured-$i"; - PublicInbox::Inbox->new({ - name => $name, - address => [ "$name\@example.com" ], - mainrepo => $dir, - # TODO: consumers may want to warn on this: - #-unconfigured => 1, + } else { # directories specified on the command-line + my @dirs = @$argv; + push @dirs, '.' if !@dirs && $opt->{-use_cwd}; + my %s2i; # "st_dev\0st_ino" => array index + for (my $i = 0; $i <= $#dirs; $i++) { + my $dir = $dirs[$i]; + my @st = stat($dir) or die "stat($dir): $!\n"; + $dir = $dirs[$i] = resolve_inboxdir($dir, \(my $ver)); + if ($ver >= $min_ver) { + $s2i{"$st[0]\0$st[1]"} //= $i; + } else { + push @old, $dir; + } + } + my $done = \'done'; + eval { + $cfg->each_inbox(sub { + my ($ibx) = @_; + return if $ibx->version < $min_ver; + my $dir = $ibx->{inboxdir}; + if (my @s = stat $dir) { + my $i = delete($s2i{"$s[0]\0$s[1]"}) + // return; + $ibxs[$i] = $ibx; + die $done if !keys(%s2i); + } else { + warn "W: $ibx->{name} $dir: $!\n"; + } }); }; + die $@ if $@ && $@ ne $done; + for my $i (sort { $a <=> $b } values %s2i) { + $ibxs[$i] = unconfigured_ibx($dirs[$i], $i); + } + @ibxs = grep { defined } @ibxs; # duplicates are undef + } + if (@old) { + die "-V$min_ver inboxes not supported by $0\n\t", + join("\n\t", @old), "\n"; } - @ibxs; + $opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs; } -# TODO: make Devel::Peek optional, only used for daemon -my @base_mod = qw(Email::MIME Date::Parse Devel::Peek); +my @base_mod = (); my @over_mod = qw(DBD::SQLite DBI); my %mod_groups = ( -index => [ @base_mod, @over_mod ], @@ -99,6 +221,10 @@ sub check_require { while (my $mod = shift @mods) { if (my $groups = $mod_groups{$mod}) { push @mods, @$groups; + } elsif ($mod eq 'Search::Xapian') { + require PublicInbox::Search; + PublicInbox::Search::load_xapian() or + $err->{'Search::Xapian || Xapian'} = $@; } else { eval "require $mod"; $err->{$mod} = $@ if $@; @@ -135,35 +261,125 @@ invalid indexlevel=$indexlevel (must be `basic', `medium', or `full') die missing_mod_msg($err) ." required for indexlevel=$indexlevel\n"; } +sub index_terminate { + my (undef, $ibx) = @_; # $_[0] = signal name + $ibx->git->cleanup; +} + sub index_inbox { - my ($ibx, $opt) = @_; + my ($ibx, $im, $opt) = @_; + require PublicInbox::InboxWritable; my $jobs = delete $opt->{jobs} if $opt; - if (ref($ibx) && ($ibx->{version} || 1) == 2) { + if (my $pr = $opt->{-progress}) { + $pr->("indexing $ibx->{inboxdir} ...\n"); + } + local @SIG{keys %SIG} = values %SIG; + setup_signals(\&index_terminate, $ibx); + my $idx = { current_info => $ibx->{inboxdir} }; + local $SIG{__WARN__} = sub { + return if PublicInbox::Eml::warn_ignore(@_); + warn($idx->{current_info}, ': ', @_); + }; + if ($ibx->version == 2) { eval { require PublicInbox::V2Writable }; die "v2 requirements not met: $@\n" if $@; - my $v2w = eval { $ibx->importer(0) } || eval { - PublicInbox::V2Writable->new($ibx, {nproc=>$jobs}); - }; + $ibx->{-creat_opt}->{nproc} = $jobs; + my $v2w = $im // $ibx->importer($opt->{reindex} // $jobs); if (defined $jobs) { if ($jobs == 0) { $v2w->{parallel} = 0; } else { - my $n = $v2w->{partitions}; - if ($jobs != ($n + 1)) { - warn -"Unable to respect --jobs=$jobs, inbox was created with $n partitions\n"; + my $n = $v2w->{shards}; + if ($jobs < ($n + 1) && !$opt->{reshard}) { + warn <($v2w->{current_info}, ': ', @_); - }; - $v2w->index_sync($opt); + $idx = $v2w; } else { require PublicInbox::SearchIdx; - my $s = PublicInbox::SearchIdx->new($ibx, 1); - $s->index_sync($opt); + $idx = PublicInbox::SearchIdx->new($ibx, 1); + } + $idx->index_sync($opt); + $idx->{nidx} // 0; # returns number processed +} + +sub progress_prepare ($;$) { + my ($opt, $dst) = @_; + + # public-inbox-index defaults to quiet, -xcpdb and -compact do not + if (defined($opt->{quiet}) && $opt->{quiet} < 0) { + $opt->{quiet} = !$opt->{verbose}; + } + if ($opt->{quiet}) { + open my $null, '>', '/dev/null' or + die "failed to open /dev/null: $!\n"; + $opt->{1} = $null; # suitable for spawn() redirect + } else { + $opt->{verbose} ||= 1; + $dst //= *STDERR{GLOB}; + $opt->{-progress} = sub { print $dst '# ', @_ }; + } +} + +# same unit factors as git: +sub parse_unsigned ($) { + my ($val) = @_; + + $$val =~ /\A([0-9]+)([kmg])?\z/i or return; + my ($n, $unit_factor) = ($1, $2 // ''); + my %u = ( k => 1024, m => 1024**2, g => 1024**3 ); + $$val = $n * ($u{lc($unit_factor)} // 1); + 1; +} + +sub index_prepare ($$) { + my ($opt, $cfg) = @_; + my $env; + if ($opt->{compact}) { + require PublicInbox::Xapcmd; + PublicInbox::Xapcmd::check_compact(); + $opt->{compact_opt} = { -coarse_lock => 1, compact => 1 }; + if (defined(my $jobs = $opt->{jobs})) { + $opt->{compact_opt}->{jobs} = $jobs; + } + } + for my $k (qw(max_size batch_size)) { + my $git_key = "publicInbox.index".ucfirst($k); + $git_key =~ s/_([a-z])/\U$1/g; + defined(my $v = $opt->{$k} // $cfg->{lc($git_key)}) or next; + parse_unsigned(\$v) or die "`$git_key=$v' not parsed\n"; + $v > 0 or die "`$git_key=$v' must be positive\n"; + $opt->{$k} = $v; + } + + # out-of-the-box builds of Xapian 1.4.x are still limited to 32-bit + # https://getting-started-with-xapian.readthedocs.io/en/latest/concepts/indexing/limitations.html + $opt->{batch_size} and + $env = { XAPIAN_FLUSH_THRESHOLD => '4294967295' }; + + for my $k (qw(sequential-shard)) { + my $git_key = "publicInbox.index".ucfirst($k); + $git_key =~ s/-([a-z])/\U$1/g; + defined(my $s = $opt->{$k} // $cfg->{lc($git_key)}) or next; + defined(my $v = $cfg->git_bool($s)) + or die "`$git_key=$s' not boolean\n"; + $opt->{$k} = $v; + } + for my $k (qw(since until)) { + my $v = $opt->{$k} // next; + $opt->{reindex} or die "--$k=$v requires --reindex\n"; + } + $env; +} + +sub do_chdir ($) { + my $chdir = $_[0] // return; + for my $d (@$chdir) { + next if $d eq ''; # same as git(1) + chdir $d or die "cd $d: $!"; } }