#!/usr/bin/perl -w
# Copyright (C) 2015-2018 all contributors
# License: AGPL-3.0+
# Basic tool to create a Xapian search index for a git repository
# configured for public-inbox.
# Usage with libeatmydata
# highly recommended: eatmydata public-inbox-index REPO_DIR
use strict;
use warnings;
use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
use Cwd 'abs_path';
my $usage = "public-inbox-index REPO_DIR";
use PublicInbox::Config;
my $config = eval { PublicInbox::Config->new } || eval {
warn "public-inbox unconfigured for serving, indexing anyways...\n";
{}
};
eval { require PublicInbox::SearchIdx };
if ($@) {
print STDERR "Search::Xapian required for $0\n";
exit 1;
}
my $reindex;
my $regen;
my $jobs = undef;
my %opts = (
'--reindex' => \$reindex,
'--regenerate' => \$regen,
'--jobs|j=i' => \$jobs,
);
GetOptions(%opts) or die "bad command-line args\n$usage";
die "--jobs must be positive\n" if defined $jobs && $jobs <= 0;
my @dirs;
sub resolve_repo_dir {
my ($cd) = @_;
my $prefix = defined $cd ? $cd : './';
if (-d $prefix && -f "$prefix/inbox.lock") { # v2
return abs_path($prefix);
}
my @cmd = qw(git rev-parse --git-dir);
my $cmd = join(' ', @cmd);
my $pid = open my $fh, '-|';
defined $pid or die "forking $cmd failed: $!\n";
if ($pid == 0) {
if (defined $cd) {
chdir $cd or die "chdir $cd failed: $!\n";
}
exec @cmd;
die "Failed to exec $cmd: $!\n";
} else {
my $dir = eval {
local $/;
<$fh>;
};
close $fh or die "error in $cmd: $!\n";
chomp $dir;
return abs_path($cd) if ($dir eq '.' && defined $cd);
abs_path($dir);
}
}
if (@ARGV) {
@dirs = map { resolve_repo_dir($_) } @ARGV;
} else {
@dirs = (resolve_repo_dir());
}
sub usage { print STDERR "Usage: $usage\n"; exit 1 }
usage() unless @dirs;
foreach my $k (keys %$config) {
$k =~ /\Apublicinbox\.([^\.]+)\.mainrepo\z/ or next;
my $name = $1;
my $v = $config->{$k};
for my $i (0..$#dirs) {
next if $dirs[$i] ne $v;
my $ibx = $config->lookup_name($name);
$dirs[$i] = $ibx if $ibx;
}
}
foreach my $dir (@dirs) {
if (!ref($dir) && -f "$dir/inbox.lock") { # v2
my $ibx = { mainrepo => $dir, name => 'unnamed' };
$dir = PublicInbox::Inbox->new($ibx);
}
index_dir($dir);
}
sub index_dir {
my ($repo) = @_;
if (!ref $repo && ! -d $repo) {
die "$repo does not appear to be an inbox repository\n";
}
if (ref($repo) && ($repo->{version} || 1) == 2) {
eval { require PublicInbox::V2Writable };
die "v2 requirements not met: $@\n" if $@;
my $v2w = eval {
local $ENV{NPROC} = $jobs;
PublicInbox::V2Writable->new($repo);
};
if (defined $jobs) {
if ($jobs == 1) {
$v2w->{parallel} = 0;
} else {
my $n = $v2w->{partitions};
if ($jobs != $n) {
warn
"Unable to respect --jobs=$jobs, inbox was created with $n partitions\n";
}
}
}
my $mm = $repo->mm;
my (undef, $max) = $mm->minmax if $mm;
if (defined($max) && !$reindex && !$regen) {
die
"v2 inboxes may only use --reindex and/or --regenerate once\n".
"msgmap.sqlite3 is initialized\n";
}
$v2w->reindex($regen);
$v2w->done;
} else {
my $s = PublicInbox::SearchIdx->new($repo, 1);
$s->index_sync({ reindex => $reindex });
}
}