#!/usr/bin/perl -w
-# Copyright (C) 2015 all contributors <meta@public-inbox.org>
-# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+# Copyright (C) 2015-2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Basic tool to create a Xapian search index for a git repository
# configured for public-inbox.
# Usage with libeatmydata <https://www.flamingspork.com/projects/libeatmydata/>
-# highly recommended: eatmydata public-inbox-index GIT_DIR
+# highly recommended: eatmydata public-inbox-index REPO_DIR
use strict;
use warnings;
-my $usage = "public-inbox-index GIT_DIR";
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+use Cwd 'abs_path';
+my $usage = "public-inbox-index REPO_DIR";
use PublicInbox::Config;
+my $config = eval { PublicInbox::Config->new } || eval {
+ warn "public-inbox unconfigured for serving, indexing anyways...\n";
+ {}
+};
eval { require PublicInbox::SearchIdx };
if ($@) {
print STDERR "Search::Xapian required for $0\n";
exit 1;
}
+
+my $reindex;
+my $regen;
+my $jobs = undef;
+my %opts = (
+ '--reindex' => \$reindex,
+ '--regenerate' => \$regen,
+ '--jobs|j=i' => \$jobs,
+);
+GetOptions(%opts) or die "bad command-line args\n$usage";
+die "--jobs must be positive\n" if defined $jobs && $jobs <= 0;
+
my @dirs;
-sub resolve_git_dir {
+sub resolve_repo_dir {
my ($cd) = @_;
+ my $prefix = defined $cd ? $cd : './';
+ if (-d $prefix && -f "$prefix/inbox.lock") { # v2
+ return abs_path($prefix);
+ }
+
my @cmd = qw(git rev-parse --git-dir);
my $cmd = join(' ', @cmd);
my $pid = open my $fh, '-|';
};
close $fh or die "error in $cmd: $!\n";
chomp $dir;
- return $cd if ($dir eq '.' && defined $cd);
- $dir;
+ return abs_path($cd) if ($dir eq '.' && defined $cd);
+ abs_path($dir);
}
}
if (@ARGV) {
- @dirs = map { resolve_git_dir($_) } @ARGV;
+ @dirs = map { resolve_repo_dir($_) } @ARGV;
} else {
- @dirs = (resolve_git_dir());
+ @dirs = (resolve_repo_dir());
}
sub usage { print STDERR "Usage: $usage\n"; exit 1 }
usage() unless @dirs;
+foreach my $k (keys %$config) {
+ $k =~ /\Apublicinbox\.([^\.]+)\.mainrepo\z/ or next;
+ my $name = $1;
+ my $v = $config->{$k};
+ for my $i (0..$#dirs) {
+ next if $dirs[$i] ne $v;
+ my $ibx = $config->lookup_name($name);
+ $dirs[$i] = $ibx if $ibx;
+ }
+}
+
foreach my $dir (@dirs) {
+ if (!ref($dir) && -f "$dir/inbox.lock") { # v2
+ my $ibx = { mainrepo => $dir, name => 'unnamed' };
+ $dir = PublicInbox::Inbox->new($ibx);
+ }
index_dir($dir);
}
sub index_dir {
- my ($git_dir) = @_;
- -d $git_dir or die "$git_dir does not appear to be a git repository\n";
+ my ($repo) = @_;
+ if (!ref $repo && ! -d $repo) {
+ die "$repo does not appear to be an inbox repository\n";
+ }
+ if (ref($repo) && ($repo->{version} || 1) == 2) {
+ eval { require PublicInbox::V2Writable };
+ die "v2 requirements not met: $@\n" if $@;
+ my $v2w = eval {
+ local $ENV{NPROC} = $jobs;
+ PublicInbox::V2Writable->new($repo);
+ };
+ if (defined $jobs) {
+ if ($jobs == 1) {
+ $v2w->{parallel} = 0;
+ } else {
+ my $n = $v2w->{partitions};
+ if ($jobs != $n) {
+ warn
+"Unable to respect --jobs=$jobs, inbox was created with $n partitions\n";
+ }
+ }
+ }
+ my $mm = $repo->mm;
+ my (undef, $max) = $mm->minmax if $mm;
+ if (defined($max) && !$reindex && !$regen) {
+ die
+"v2 inboxes may only use --reindex and/or --regenerate once\n".
+"msgmap.sqlite3 is initialized\n";
+ }
- my $s = PublicInbox::SearchIdx->new($git_dir, 1);
- $s->index_sync;
+ $v2w->reindex($regen);
+ $v2w->done;
+ } else {
+ my $s = PublicInbox::SearchIdx->new($repo, 1);
+ $s->index_sync({ reindex => $reindex });
+ }
}