}
sub html_end {
- print $out <<EOF or die;
- git clone $PublicInbox::WwwStream::CODE_URL
-</pre></body></html>
-EOF
+ for (@$PublicInbox::WwwStream::CODE_URL) {
+ print $out " git clone $_\n" or die;
+ }
+ print $out "</pre></body></html>\n" or die;
}
sub atom_start {
Available in public-inbox 1.6.0+.
+=item --update-extindex=EXTINDEX, -E
+
+Update the given external index (L<public-inbox-extindex-format(5)>.
+Either the configured section name (e.g. C<all>) or a directory name
+may be specified.
+
+Defaults to C<all> if C<[extindex "all"]> is configured,
+otherwise no external indices are updated.
+
+May be specified multiple times in rare cases where multiple
+external indices are configured.
+
+=item --no-update-extindex
+
+Do not update the C<all> external index by default. This negates
+all uses of C<-E> / C<--update-extindex=> on the command-line.
+
=back
=head1 FILES
=head1 SEE ALSO
-L<Search::Xapian>, L<DBD::SQLite>
+L<Search::Xapian>, L<DBD::SQLite>, L<public-inbox-extindex-format(5)>
=head1 NAME
-public-inbox v1 git repository and tree description (aka "ssoma")
+public-inbox-v1-format - git repository and tree description (aka "ssoma")
=head1 DESCRIPTION
=head1 NAME
-public-inbox v2 format description
+public-inbox-v2-format - structure of public inbox v2 archives
=head1 DESCRIPTION
t/xcpdb-reshard.t
xt/cmp-msgstr.t
xt/cmp-msgview.t
+xt/create-many-inboxes.t
xt/eml_check_limits.t
xt/git-http-backend.t
xt/git_async_cmp.t
git clone https://public-inbox.org/public-inbox.git
git clone https://repo.or.cz/public-inbox.git
+ torsocks git clone http://ou63pmih66umazou.onion/public-inbox.git
torsocks git clone http://hjrcffqmbrq6wope.onion/public-inbox
See below for contact info.
package PublicInbox::Admin;
use strict;
use parent qw(Exporter);
-use Cwd qw(abs_path);
-use POSIX ();
our @EXPORT_OK = qw(setup_signals);
use PublicInbox::Config;
use PublicInbox::Inbox;
use PublicInbox::Spawn qw(popen_rd);
+*rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed;
sub setup_signals {
my ($cb, $arg) = @_; # optional
+ require POSIX;
# we call exit() here instead of _exit() so DESTROY methods
# get called (e.g. File::Temp::Dir and PublicInbox::Msgmap)
sub resolve_inboxdir {
my ($cd, $ver) = @_;
- my $prefix = defined $cd ? $cd : './';
- if (-d $prefix && -f "$prefix/inbox.lock") { # v2
- $$ver = 2 if $ver;
- return abs_path($prefix);
+ my $try = $cd // '.';
+ my $root_dev_ino;
+ while (1) { # favor v2, first
+ if (-f "$try/inbox.lock") {
+ $$ver = 2 if $ver;
+ return rel2abs_collapsed($try);
+ } elsif (-d $try) {
+ my @try = stat _;
+ $root_dev_ino //= do {
+ my @root = stat('/') or die "stat /: $!\n";
+ "$root[0]\0$root[1]";
+ };
+ last if "$try[0]\0$try[1]" eq $root_dev_ino;
+ $try .= '/..'; # continue, cd up
+ } else {
+ die "`$try' is not a directory\n";
+ }
}
+ # try v1 bare git dirs
my $cmd = [ qw(git rev-parse --git-dir) ];
my $fh = popen_rd($cmd, undef, {-C => $cd});
my $dir = do { local $/; <$fh> };
- close $fh or die "error in ".join(' ', @$cmd)." (cwd:$cd): $!\n";
+ close $fh or die "error in @$cmd (cwd:${\($cd // '.')}): $!\n";
chomp $dir;
$$ver = 1 if $ver;
- return abs_path($cd) if ($dir eq '.' && defined $cd);
- abs_path($dir);
+ rel2abs_collapsed($dir eq '.' ? ($cd // $dir) : $dir);
}
# for unconfigured inboxes
name => $name,
address => [ "$name\@example.com" ],
inboxdir => $dir,
- # TODO: consumers may want to warn on this:
- #-unconfigured => 1,
+ # consumers (-convert) warn on this:
+ -unconfigured => 1,
});
}
}
my $min_ver = $opt->{-min_inbox_version} || 0;
+ # lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames,
+ # pathnames are not unique due to symlinks and bind mounts
my (@old, @ibxs);
- my %dir2ibx;
- my $all = $opt->{all} ? [] : undef;
- if ($cfg) {
+ if ($opt->{all}) {
$cfg->each_inbox(sub {
my ($ibx) = @_;
- my $path = abs_path($ibx->{inboxdir});
- if (defined($path)) {
- $dir2ibx{$path} = $ibx;
- push @$all, $ibx if $all;
+ if (-e $ibx->{inboxdir}) {
+ push(@ibxs, $ibx) if $ibx->version >= $min_ver;
} else {
- warn <<EOF;
-W: $ibx->{name} $ibx->{inboxdir}: $!
-EOF
+ warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n";
}
});
- }
- if ($all) {
- @$all = grep { $_->version >= $min_ver } @$all;
- @ibxs = @$all;
} else { # directories specified on the command-line
- my $i = 0;
my @dirs = @$argv;
push @dirs, '.' if !@dirs && $opt->{-use_cwd};
- foreach (@dirs) {
- my $v;
- my $dir = resolve_inboxdir($_, \$v);
- if ($v < $min_ver) {
+ my %s2i; # "st_dev\0st_ino" => array index
+ for (my $i = 0; $i <= $#dirs; $i++) {
+ my $dir = $dirs[$i];
+ my @st = stat($dir) or die "stat($dir): $!\n";
+ $dir = $dirs[$i] = resolve_inboxdir($dir, \(my $ver));
+ if ($ver >= $min_ver) {
+ $s2i{"$st[0]\0$st[1]"} //= $i;
+ } else {
push @old, $dir;
- next;
}
- my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i);
- $i++;
- push @ibxs, $ibx;
}
+ my $done = \'done';
+ eval {
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ return if $ibx->version < $min_ver;
+ my $dir = $ibx->{inboxdir};
+ if (my @s = stat $dir) {
+ my $i = delete($s2i{"$s[0]\0$s[1]"})
+ // return;
+ $ibxs[$i] = $ibx;
+ die $done if !keys(%s2i);
+ } else {
+ warn "W: $ibx->{name} $dir: $!\n";
+ }
+ });
+ };
+ die $@ if $@ && $@ ne $done;
+ for my $i (sort { $a <=> $b } values %s2i) {
+ $ibxs[$i] = unconfigured_ibx($dirs[$i], $i);
+ }
+ @ibxs = grep { defined } @ibxs; # duplicates are undef
}
if (@old) {
die "-V$min_ver inboxes not supported by $0\n\t",
}
local %SIG = %SIG;
setup_signals(\&index_terminate, $ibx);
- my $warn_cb = $SIG{__WARN__} // sub { print STDERR @_ };
+ my $warn_cb = $SIG{__WARN__} // \&CORE::warn;
my $idx = { current_info => $ibx->{inboxdir} };
my $warn_ignore = PublicInbox::InboxWritable->can('warn_ignore');
local $SIG{__WARN__} = sub {
$idx = PublicInbox::SearchIdx->new($ibx, 1);
}
$idx->index_sync($opt);
+ $idx->{nidx} // 0; # returns number processed
}
sub progress_prepare ($) {
sub config_fh_parse ($$$) {
my ($fh, $rs, $fs) = @_;
- my %rv;
- my (%section_seen, @section_order);
+ my (%rv, %seen, @section_order, $line, $k, $v, $section, $cur, $i);
local $/ = $rs;
- while (defined(my $line = <$fh>)) {
- chomp $line;
- my ($k, $v) = split($fs, $line, 2);
- my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/);
- unless (defined $section_seen{$section}) {
- $section_seen{$section} = 1;
- push @section_order, $section;
- }
-
- my $cur = $rv{$k};
- if (defined $cur) {
+ while (defined($line = <$fh>)) { # perf critical with giant configs
+ $i = index($line, $fs);
+ $k = substr($line, 0, $i);
+ $v = substr($line, $i + 1, -1); # chop off $fs
+ $section = substr($k, 0, rindex($k, '.'));
+ $seen{$section} //= push(@section_order, $section);
+
+ if (defined($cur = $rv{$k})) {
if (ref($cur) eq "ARRAY") {
push @$cur, $v;
} else {
sub git_config_dump {
my ($file) = @_;
return {} unless -e $file;
- my @cmd = (qw/git config -z -l --includes/, "--file=$file");
- my $cmd = join(' ', @cmd);
- my $fh = popen_rd(\@cmd);
+ my $cmd = [ qw(git config -z -l --includes), "--file=$file" ];
+ my $fh = popen_rd($cmd);
my $rv = config_fh_parse($fh, "\0", "\n");
- close $fh or die "failed to close ($cmd) pipe: $?";
+ close $fh or die "failed to close (@$cmd) pipe: $?";
$rv;
}
}
}
+# abs_path resolves symlinks, so we want to avoid it if rel2abs
+# is sufficient and doesn't leave "/.." or "/../"
+sub rel2abs_collapsed {
+ require File::Spec;
+ my $p = File::Spec->rel2abs($_[-1]);
+ return $p if substr($p, -3, 3) ne '/..' && index($p, '/../') < 0;
+ require Cwd;
+ Cwd::abs_path($p);
+}
+
sub _fill {
my ($self, $pfx) = @_;
my $ibx = {};
}
}
- # backwards compatibility:
- $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"};
- if (($ibx->{inboxdir} // '') =~ /\n/s) {
- warn "E: `$ibx->{inboxdir}' must not contain `\\n'\n";
+ # "mainrepo" is backwards compatibility:
+ my $dir = $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"} // return;
+ if (index($dir, "\n") >= 0) {
+ warn "E: `$dir' must not contain `\\n'\n";
return;
}
foreach my $k (qw(obfuscate)) {
}
}
- return unless defined($ibx->{inboxdir});
- my $name = $pfx;
- $name =~ s/\Apublicinbox\.//;
-
+ my $name = substr($pfx, length('publicinbox.'));
if (!valid_inbox_name($name)) {
warn "invalid inbox name: '$name'\n";
return;
$self->{-by_list_id}->{lc($list_id)} = $ibx;
}
}
- if (my $ngname = $ibx->{newsgroup}) {
+ if (defined(my $ngname = $ibx->{newsgroup})) {
if (ref($ngname)) {
delete $ibx->{newsgroup};
warn 'multiple newsgroups not supported: '.
# wildmat-exact and RFC 3501 (IMAP) ATOM-CHAR.
# Leave out a few chars likely to cause problems or conflicts:
# '|', '<', '>', ';', '#', '$', '&',
- } elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]!) {
+ } elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! ||
+ $ngname eq '') {
delete $ibx->{newsgroup};
warn "newsgroup name invalid: `$ngname'\n";
} else {
$self->{-by_newsgroup}->{$ngname} = $ibx;
}
}
+ unless (defined $ibx->{newsgroup}) { # for ->eidx_key
+ my $abs = rel2abs_collapsed($dir);
+ if ($abs ne $dir) {
+ warn "W: `$dir' canonicalized to `$abs'\n";
+ $ibx->{inboxdir} = $abs;
+ }
+ }
$self->{-by_name}->{$name} = $ibx;
if ($ibx->{obfuscate}) {
$ibx->{-no_obfuscate} = $self->{-no_obfuscate};
$PostLoopCallback, # subref to call at the end of each loop, if defined (global)
$LoopTimeout, # timeout of event loop in milliseconds
- $DoneInit, # if we've done the one-time module init yet
@Timers, # timers
$in_loop,
);
@Timers = ();
$PostLoopCallback = undef;
- $DoneInit = 0;
$_io = undef; # closes real $Epoll FD
$Epoll = undef; # may call DSKQXS::DESTROY
-
- *EventLoop = *FirstTimeEventLoop;
}
=head2 C<< CLASS->SetLoopTimeout( $timeout ) >>
immediately.
=cut
-sub SetLoopTimeout {
- return $LoopTimeout = $_[1] + 0;
-}
+sub SetLoopTimeout { $LoopTimeout = $_[1] + 0 }
=head2 C<< PublicInbox::DS::add_timer( $seconds, $coderef, $arg) >>
fcntl($_io, F_SETFD, $fl | FD_CLOEXEC);
}
+# caller sets return value to $Epoll
sub _InitPoller
{
- return if $DoneInit;
- $DoneInit = 1;
-
if (PublicInbox::Syscall::epoll_defined()) {
- $Epoll = epoll_create();
- set_cloexec($Epoll) if (defined($Epoll) && $Epoll >= 0);
+ my $fd = epoll_create();
+ set_cloexec($fd) if (defined($fd) && $fd >= 0);
+ $fd;
} else {
my $cls;
for (qw(DSKQXS DSPoll)) {
last if eval "require $cls";
}
$cls->import(qw(epoll_ctl epoll_wait));
- $Epoll = $cls->new;
+ $cls->new;
}
- *EventLoop = *EpollEventLoop;
}
=head2 C<< CLASS->EventLoop() >>
C<PostLoopCallback> below for how to exit the loop.
=cut
-sub FirstTimeEventLoop {
- my $class = shift;
-
- _InitPoller();
-
- EventLoop($class);
-}
sub now () { clock_gettime(CLOCK_MONOTONIC) }
my $timeout = int(($Timers[0][0] - $now) * 1000) + 1;
# -1 is an infinite timeout, so prefer a real timeout
- return $timeout if $LoopTimeout == -1;
-
- # otherwise pick the lower of our regular timeout and time until
- # the next timer
- return $LoopTimeout if $LoopTimeout < $timeout;
- return $timeout;
+ ($LoopTimeout < 0 || $LoopTimeout >= $timeout) ? $timeout : $LoopTimeout;
}
# We can't use waitpid(-1) safely here since it can hit ``, system(),
$PostLoopCallback ? $PostLoopCallback->(\%DescriptorMap) : 1;
}
-sub EpollEventLoop {
+sub EventLoop {
+ $Epoll //= _InitPoller();
local $in_loop = 1;
+ my @events;
do {
- my @events;
- my $i;
my $timeout = RunTimers();
# get up to 1000 events
- my $evcount = epoll_wait($Epoll, 1000, $timeout, \@events);
- for ($i=0; $i<$evcount; $i++) {
+ epoll_wait($Epoll, 1000, $timeout, \@events);
+ for my $fd (@events) {
# it's possible epoll_wait returned many events, including some at the end
# that ones in the front triggered unregister-interest actions. if we
# can't find the %sock entry, it's because we're no longer interested
# in that event.
- $DescriptorMap{$events[$i]->[0]}->event_step;
+ $DescriptorMap{$fd}->event_step;
}
} while (PostEventLoop());
_run_later();
$self->{sock} = $sock;
my $fd = fileno($sock);
- _InitPoller();
-
+ $Epoll //= _InitPoller();
retry:
if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) {
if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) {
}
}
# caller only cares for $events[$i]->[0]
- scalar(@$events);
+ $_ = $_->[0] for @$events;
}
# kqueue is close-on-fork (not exec), so we must not close it
my $fd = $pset[$i++];
my $revents = $pset[$i++] or next;
delete($self->{$fd}) if $self->{$fd} & EPOLLONESHOT;
- push @$events, [ $fd ];
+ push @$events, $fd;
}
my $nevents = scalar @$events;
if ($n != $nevents) {
warn "BUG? poll() returned $n, but got $nevents";
}
}
- $n;
}
1;
use POSIX qw(WNOHANG :signal_h);
use Socket qw(IPPROTO_TCP SOL_SOCKET);
sub SO_ACCEPTFILTER () { 0x1000 }
-use Cwd qw/abs_path/;
STDOUT->autoflush(1);
STDERR->autoflush(1);
use PublicInbox::DS qw(now);
sub daemonize () {
if ($daemonize) {
+ require Cwd;
foreach my $i (0..$#ARGV) {
my $arg = $ARGV[$i];
next unless -e $arg;
- $ARGV[$i] = abs_path($arg);
+ $ARGV[$i] = Cwd::abs_path($arg);
}
check_absolute('stdout', $stdout);
check_absolute('stderr', $stderr);
foreach my $fd (3..$end) {
my $s = IO::Handle->new_from_fd($fd, 'r');
if (my $k = sockname($s)) {
- if ($s->blocking) {
- $s->blocking(0);
- warn <<"";
+ my $prev_was_blocking = $s->blocking(0);
+ warn <<"" if $prev_was_blocking;
Inherited socket (fd=$fd) is blocking, making it non-blocking.
Set 'NonBlocking = true' in the systemd.service unit to avoid stalled
processes when multiple service instances start.
- }
$listener_names->{$k} = $s;
push @rv, $s;
} else {
}
sub kill_workers ($) {
- my ($s) = @_;
-
- while (my ($pid, $id) = each %pids) {
- kill $s, $pid;
- }
+ my ($sig) = @_;
+ kill $sig, keys(%pids);
}
sub upgrade_aborted ($) {
header_set($self, $name, @vals);
}
-sub mhdr_decode ($) { eval { $MIME_Header->decode($_[0]) } // $_[0] }
+sub mhdr_decode ($) {
+ eval { $MIME_Header->decode($_[0], Encode::FB_DEFAULT) } // $_[0];
+}
sub filename {
my $dis = header_raw($_[0], 'Content-Disposition');
my ($ibx, $mid) = @_;
return if length($mid) < $MIN_PARTIAL_LEN;
my $srch = $ibx->search or return; # NOT ->isrch, we already try ->ALL
- my $opt = { limit => PARTIAL_MAX, mset => 2 };
+ my $opt = { limit => PARTIAL_MAX, relevance => -1 };
my @try = ("m:$mid*");
my $chop = $mid;
if ($chop =~ s/(\W+)(\w*)\z//) {
use v5.10.1;
use PublicInbox::Over;
use PublicInbox::Inbox;
-use File::Spec ();
use PublicInbox::MiscSearch;
use DBI qw(:sql_types); # SQL_BLOB
sub new {
my ($class, $topdir) = @_;
- $topdir = File::Spec->canonpath($topdir);
bless {
topdir => $topdir,
# xpfx => 'ei15'
$self->{misc} //= PublicInbox::MiscSearch->new("$self->{xpfx}/misc");
}
-sub search { $_[0] } # self
-
# same as per-inbox ->over, for now...
sub over {
my ($self) = @_;
*recent = \&PublicInbox::Inbox::recent;
*max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
-*isrch = *search;
+*isrch = *search = \&PublicInbox::Search::reopen;
1;
use PublicInbox::InboxWritable;
use PublicInbox::ContentHash qw(content_hash);
use PublicInbox::Eml;
-use File::Spec;
use PublicInbox::DS qw(now);
use DBI qw(:sql_types); # SQL_BLOB
sub new {
my (undef, $dir, $opt) = @_;
- $dir = File::Spec->canonpath($dir);
my $l = $opt->{indexlevel} // 'full';
$l !~ $PublicInbox::SearchIdx::INDEXLEVELS and
die "invalid indexlevel=$l\n";
}, __PACKAGE__;
$self->{shards} = $self->count_shards || nproc_shards($opt->{creat});
my $oidx = PublicInbox::OverIdx->new("$self->{xpfx}/over.sqlite3");
- $oidx->{-no_fsync} = 1 if $opt->{-no_fsync};
+ $self->{-no_fsync} = $oidx->{-no_fsync} = 1 if !$opt->{fsync};
$self->{oidx} = $oidx;
$self
}
sub attach_inbox {
my ($self, $ibx) = @_;
- my $key = $ibx->eidx_key;
- if (!$ibx->over || !$ibx->mm) {
- warn "W: skipping $key (unindexed)\n";
- return;
- }
- if (!defined($ibx->uidvalidity)) {
- warn "W: skipping $key (no UIDVALIDITY)\n";
- return;
- }
- my $ibxdir = File::Spec->canonpath($ibx->{inboxdir});
- if ($ibxdir ne $ibx->{inboxdir}) {
- warn "W: `$ibx->{inboxdir}' canonicalized to `$ibxdir'\n";
- $ibx->{inboxdir} = $ibxdir;
- }
- $self->{ibx_map}->{$key} //= do {
+ $self->{ibx_map}->{$ibx->eidx_key} //= do {
push @{$self->{ibx_list}}, $ibx;
$ibx;
}
$heads;
}
+sub _ibx_index_reject ($) {
+ my ($ibx) = @_;
+ $ibx->mm // return 'unindexed, no msgmap.sqlite3';
+ $ibx->uidvalidity // return 'no UIDVALIDITY';
+ $ibx->over // return 'unindexed, no over.sqlite3';
+ undef;
+}
+
sub _sync_inbox ($$$) {
my ($self, $sync, $ibx) = @_;
+ my $ekey = $ibx->eidx_key;
+ if (defined(my $err = _ibx_index_reject($ibx))) {
+ return "W: skipping $ekey ($err)";
+ }
$sync->{ibx} = $ibx;
$sync->{nr} = \(my $nr = 0);
my $v = $ibx->version;
- my $ekey = $ibx->eidx_key;
if ($v == 2) {
$sync->{epoch_max} = $ibx->max_git_epoch // return;
sync_prepare($self, $sync); # or return # TODO: once MiscIdx is stable
} elsif ($v == 1) {
my $uv = $ibx->uidvalidity;
my $lc = $self->{oidx}->eidx_meta("lc-v1:$ekey//$uv");
- my $head = $ibx->mm->last_commit;
- unless (defined $head) {
- warn "E: $ibx->{inboxdir} is not indexed\n";
- return;
- }
+ my $head = $ibx->mm->last_commit //
+ return "E: $ibx->{inboxdir} is not indexed";
my $stk = prepare_stack($sync, $lc ? "$lc..$head" : $head);
my $unit = { stack => $stk, git => $ibx->git };
push @{$sync->{todo}}, $unit;
} else {
- warn "E: $ekey unsupported inbox version (v$v)\n";
- return;
+ return "E: $ekey unsupported inbox version (v$v)";
}
for my $unit (@{delete($sync->{todo}) // []}) {
last if $sync->{quit};
}
$self->{midx}->index_ibx($ibx) unless $sync->{quit};
$ibx->git->cleanup; # done with this inbox, now
+ undef;
}
sub gc_unref_doc ($$$$) {
$self->{ibx_list}->[$pos] // die "BUG: ibx for $smsg->{blob} not mapped"
}
+sub _fd_constrained ($) {
+ my ($self) = @_;
+ $self->{-fd_constrained} //= do {
+ my $soft;
+ if (eval { require BSD::Resource; 1 }) {
+ my $NOFILE = BSD::Resource::RLIMIT_NOFILE();
+ ($soft, undef) = BSD::Resource::getrlimit($NOFILE);
+ } else {
+ chomp($soft = `sh -c 'ulimit -n'`);
+ }
+ if (defined($soft)) {
+ my $want = scalar(@{$self->{ibx_list}}) + 64; # estimate
+ my $ret = $want > $soft;
+ if ($ret) {
+ warn <<EOF;
+RLIMIT_NOFILE=$soft insufficient (want: $want), will close DB handles early
+EOF
+ }
+ $ret;
+ } else {
+ warn "Unable to determine RLIMIT_NOFILE: $@\n";
+ 1;
+ }
+ };
+}
+
sub _reindex_finalize ($$$) {
my ($req, $smsg, $eml) = @_;
my $sync = $req->{sync};
my $x = pop(@$ary) // die "BUG: #$docid {by_chash} empty";
$x->{num} = delete($x->{xnum}) // die '{xnum} unset';
$ibx = _ibx_for($self, $sync, $x);
- my $e = $ibx->over->get_art($x->{num});
- $e->{blob} eq $x->{blob} or die <<EOF;
+ if (my $over = $ibx->over) {
+ my $e = $over->get_art($x->{num});
+ $e->{blob} eq $x->{blob} or die <<EOF;
$x->{blob} != $e->{blob} (${\$ibx->eidx_key}:$e->{num});
EOF
- push @todo, $ibx, $e;
+ push @todo, $ibx, $e;
+ $over->dbh_close if _fd_constrained($self);
+ } else {
+ die "$ibx->{inboxdir}: over.sqlite3 unusable: $!\n";
+ }
}
undef $by_chash;
while (my ($ibx, $e) = splice(@todo, 0, 2)) {
my $dbh = $self->{oidx}->dbh;
my $tot = $dbh->selectrow_array('SELECT COUNT(*) FROM eidxq') or return;
${$sync->{nr}} = 0;
- $sync->{-regen_fmt} = "%u/$tot\n";
+ local $sync->{-regen_fmt} = "%u/$tot\n";
my $pr = $sync->{-opt}->{-progress};
if ($pr) {
my $min = $dbh->selectrow_array('SELECT MIN(docid) FROM eidxq');
my $msgs;
my $pr = $sync->{-opt}->{-progress};
my $ekey = $ibx->eidx_key;
- $sync->{-regen_fmt} = "$ekey checking unseen %u/".$ibx->over->max."\n";
+ local $sync->{-regen_fmt} =
+ "$ekey checking unseen %u/".$ibx->over->max."\n";
${$sync->{nr}} = 0;
while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end)})) {
my $pr = $sync->{-opt}->{-progress};
my $fetching;
my $ekey = $ibx->eidx_key;
- $sync->{-regen_fmt} =
+ local $sync->{-regen_fmt} =
"$ekey check stale/missing %u/".$ibx->over->max."\n";
${$sync->{nr}} = 0;
do {
sub _reindex_inbox ($$$) {
my ($self, $sync, $ibx) = @_;
- local $self->{current_info} = $ibx->eidx_key;
- _reindex_check_unseen($self, $sync, $ibx);
- _reindex_check_stale($self, $sync, $ibx) unless $sync->{quit};
+ my $ekey = $ibx->eidx_key;
+ local $self->{current_info} = $ekey;
+ if (defined(my $err = _ibx_index_reject($ibx))) {
+ warn "W: cannot reindex $ekey ($err)\n";
+ } else {
+ _reindex_check_unseen($self, $sync, $ibx);
+ _reindex_check_stale($self, $sync, $ibx) unless $sync->{quit};
+ }
delete @$ibx{qw(over mm search git)}; # won't need these for a bit
}
eidxq_process($self, $sync) unless $sync->{quit};
}
+sub sync_inbox {
+ my ($self, $sync, $ibx) = @_;
+ my $err = _sync_inbox($self, $sync, $ibx);
+ delete @$ibx{qw(mm over)};
+ warn $err, "\n" if defined($err);
+}
+
sub eidx_sync { # main entry point
my ($self, $opt) = @_;
- my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+ my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
local $self->{current_info} = '';
local $SIG{__WARN__} = sub {
$warn_cb->($self->{current_info}, ': ', @_);
$ibx->{-ibx_id} //= $self->{oidx}->ibx_id($ibx->eidx_key);
}
if (delete($opt->{reindex})) {
- $sync->{checkpoint_unlocks} = 1;
+ local $sync->{checkpoint_unlocks} = 1;
eidx_reindex($self, $sync);
}
# don't use $_ here, it'll get clobbered by reindex_checkpoint
- for my $ibx (@{$self->{ibx_list}}) {
- last if $sync->{quit};
- _sync_inbox($self, $sync, $ibx);
+ if ($opt->{scan} // 1) {
+ for my $ibx (@{$self->{ibx_list}}) {
+ last if $sync->{quit};
+ sync_inbox($self, $sync, $ibx);
+ }
}
$self->{oidx}->rethread_done($opt) unless $sync->{quit};
eidxq_process($self, $sync) unless $sync->{quit};
eidxq_release($self);
- PublicInbox::V2Writable::done($self);
+ done($self);
+ $sync; # for eidx_watch
}
sub update_last_commit { # overrides V2Writable
PublicInbox::V2Writable::write_alternates($info_dir, $mode, $o);
}
$self->parallel_init($self->{indexlevel});
- $self->umask_prepare;
$self->with_umask(\&_idx_init, $self, $opt);
$self->{oidx}->begin_lazy;
$self->{oidx}->eidx_prep;
$self->{midx}->begin_txn;
}
+sub _watch_commit { # PublicInbox::DS::add_timer callback
+ my ($self) = @_;
+ delete $self->{-commit_timer};
+ eidxq_process($self, $self->{-watch_sync});
+ eidxq_release($self);
+ delete local $self->{-watch_sync}->{-regen_fmt};
+ reindex_checkpoint($self, $self->{-watch_sync});
+
+ # call event_step => done unless commit_timer is armed
+ PublicInbox::DS::requeue($self);
+}
+
+sub on_inbox_unlock { # called by PublicInbox::InboxIdle
+ my ($self, $ibx) = @_;
+ my $opt = $self->{-watch_sync}->{-opt};
+ my $pr = $opt->{-progress};
+ my $ekey = $ibx->eidx_key;
+ local $0 = "sync $ekey";
+ $pr->("indexing $ekey\n") if $pr;
+ $self->idx_init($opt);
+ sync_inbox($self, $self->{-watch_sync}, $ibx);
+ $self->{-commit_timer} //= PublicInbox::DS::add_timer(
+ $opt->{'commit-interval'} // 10,
+ \&_watch_commit, $self);
+}
+
+sub eidx_reload { # -extindex --watch SIGHUP handler
+ my ($self, $idler) = @_;
+ if ($self->{cfg}) {
+ my $pr = $self->{-watch_sync}->{-opt}->{-progress};
+ $pr->('reloading ...') if $pr;
+ delete $self->{-resync_queue};
+ @{$self->{ibx_list}} = ();
+ %{$self->{ibx_map}} = ();
+ delete $self->{-watch_sync}->{id2pos};
+ my $cfg = PublicInbox::Config->new;
+ attach_config($self, $cfg);
+ $idler->refresh($cfg);
+ $pr->(" done\n") if $pr;
+ } else {
+ warn "reload not supported without --all\n";
+ }
+}
+
+sub eidx_resync_start ($) { # -extindex --watch SIGUSR1 handler
+ my ($self) = @_;
+ $self->{-resync_queue} //= [ @{$self->{ibx_list}} ];
+ PublicInbox::DS::requeue($self); # trigger our ->event_step
+}
+
+sub event_step { # PublicInbox::DS::requeue callback
+ my ($self) = @_;
+ if (my $resync_queue = $self->{-resync_queue}) {
+ if (my $ibx = shift(@$resync_queue)) {
+ on_inbox_unlock($self, $ibx);
+ PublicInbox::DS::requeue($self);
+ } else {
+ delete $self->{-resync_queue};
+ _watch_commit($self);
+ }
+ } else {
+ done($self) unless $self->{-commit_timer};
+ }
+}
+
+sub eidx_watch { # public-inbox-extindex --watch main loop
+ my ($self, $opt) = @_;
+ local %SIG = %SIG;
+ for my $sig (qw(HUP USR1 TSTP QUIT INT TERM)) {
+ $SIG{$sig} = sub { warn "SIG$sig ignored while scanning\n" };
+ }
+ require PublicInbox::InboxIdle;
+ require PublicInbox::DS;
+ require PublicInbox::Syscall;
+ require PublicInbox::Sigfd;
+ my $idler = PublicInbox::InboxIdle->new($self->{cfg});
+ if (!$self->{cfg}) {
+ $idler->watch_inbox($_) for @{$self->{ibx_list}};
+ }
+ $_->subscribe_unlock(__PACKAGE__, $self) for @{$self->{ibx_list}};
+ my $pr = $opt->{-progress};
+ $pr->("performing initial scan ...\n") if $pr;
+ my $sync = eidx_sync($self, $opt); # initial sync
+ return if $sync->{quit};
+ my $oldset = PublicInbox::Sigfd::block_signals();
+ local $self->{current_info} = '';
+ my $cb = $SIG{__WARN__} || \&CORE::warn;
+ local $SIG{__WARN__} = sub { $cb->($self->{current_info}, ': ', @_) };
+ my $sig = {
+ HUP => sub { eidx_reload($self, $idler) },
+ USR1 => sub { eidx_resync_start($self) },
+ TSTP => sub { kill('STOP', $$) },
+ };
+ my $quit = PublicInbox::SearchIdx::quit_cb($sync);
+ $sig->{QUIT} = $sig->{INT} = $sig->{TERM} = $quit;
+ my $sigfd = PublicInbox::Sigfd->new($sig,
+ $PublicInbox::Syscall::SFD_NONBLOCK);
+ %SIG = (%SIG, %$sig) if !$sigfd;
+ local $self->{-watch_sync} = $sync; # for ->on_inbox_unlock
+ if (!$sigfd) {
+ # wake up every second to accept signals if we don't
+ # have signalfd or IO::KQueue:
+ PublicInbox::Sigfd::sig_setmask($oldset);
+ PublicInbox::DS->SetLoopTimeout(1000);
+ }
+ PublicInbox::DS->SetPostLoopCallback(sub { !$sync->{quit} });
+ $pr->("initial scan complete, entering event loop\n") if $pr;
+ PublicInbox::DS->EventLoop; # calls InboxIdle->event_step
+ done($self);
+}
+
no warnings 'once';
*done = \&PublicInbox::V2Writable::done;
-*umask_prepare = \&PublicInbox::InboxWritable::umask_prepare;
*with_umask = \&PublicInbox::InboxWritable::with_umask;
*parallel_init = \&PublicInbox::V2Writable::parallel_init;
*nproc_shards = \&PublicInbox::V2Writable::nproc_shards;
if (open(my $fh, '<', $f)) {
chomp($l, $c);
local $/;
- $c_src = <$fh>;
+ defined($c_src = <$fh>) or die "read $f: $!\n";
$CFG{LIBS} = $l;
$CFG{CCFLAGSEX} = $c;
last;
sub qx {
my ($self, @cmd) = @_;
my $fh = $self->popen(@cmd);
- local $/ = "\n";
- return <$fh> if wantarray;
- local $/;
- <$fh>
+ local $/ = wantarray ? "\n" : undef;
+ <$fh>;
}
# check_async and cat_async may trigger the other, so ensure they're
my $srch = $self->{ibx}->isrch or
return "$tag BAD search not available for mailbox\r\n";
my $opt = {
- mset => 2,
+ relevance => -1,
limit => UID_SLICE,
uid_range => $range_info
};
return ($self->{in}, $self->{out}) if $self->{pid};
- my (@ret, $out_r, $out_w);
+ my ($in_r, $pid, $out_r, $out_w);
pipe($out_r, $out_w) or die "pipe failed: $!";
$self->lock_acquire;
my ($git, $ref) = @$self{qw(git ref)};
local $/ = "\n";
chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $ref));
+ die "fatal: rev-parse --revs-only $ref: \$?=$?" if $?;
if ($self->{path_type} ne '2/38' && $self->{tip}) {
local $/ = "\0";
my @t = $git->qx(qw(ls-tree -r -z --name-only), $ref);
+ die "fatal: ls-tree -r -z --name-only $ref: \$?=$?" if $?;
chomp @t;
$self->{-tree} = { map { $_ => 1 } @t };
}
my @cmd = ('git', "--git-dir=$git->{git_dir}",
qw(fast-import --quiet --done --date-format=raw));
- my ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r });
+ ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r });
$out_w->autoflush(1);
$self->{in} = $in_r;
$self->{out} = $out_w;
$self->{pid} = $pid;
$self->{nchg} = 0;
- @ret = ($in_r, $out_w);
};
if ($@) {
$self->lock_release;
die $@;
}
- @ret;
+ ($in_r, $out_w);
}
sub wfail () { die "write to fast-import failed: $!" }
delete $opts->{feedmax};
}
$opts->{nntpserver} ||= $pi_cfg->{'publicinbox.nntpserver'};
- my $dir = $opts->{inboxdir};
- if (defined $dir && -f "$dir/inbox.lock") {
- $opts->{version} = 2;
- }
# allow any combination of multi-line or comma-delimited hide entries
my $hide = {};
bless $opts, $class;
}
-sub version { $_[0]->{version} // 1 }
+sub version {
+ $_[0]->{version} //= -f "$_[0]->{inboxdir}/inbox.lock" ? 2 : 1
+}
sub git_epoch {
my ($self, $epoch) = @_; # v2-only, callers always supply $epoch
return unless -d $git_dir;
my $g = PublicInbox::Git->new($git_dir);
$g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
- # no cleanup needed, we never cat-file off this, only clone
+ # caller must manually cleanup when done
$g;
};
}
sub try_cat {
my ($path) = @_;
- my $rv = '';
- if (open(my $fh, '<', $path)) {
- local $/;
- $rv = <$fh>;
- }
- $rv;
+ open(my $fh, '<', $path) or return '';
+ local $/;
+ <$fh> // '';
}
sub cat_desc ($) {
my ($self) = @_;
check_inodes($self);
my $subs = $self->{unlock_subs} or return;
- for (values %$subs) {
- eval { $_->on_inbox_unlock($self) };
+ for my $obj (values %$subs) {
+ eval { $obj->on_inbox_unlock($self) };
warn "E: $@ ($self->{inboxdir})\n" if $@;
}
}
package PublicInbox::InboxIdle;
use strict;
use parent qw(PublicInbox::DS);
-use Cwd qw(abs_path);
use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
my $IN_MODIFY = 0x02; # match Linux inotify
my $ino_cls;
sub in2_arm ($$) { # PublicInbox::Config::each_inbox callback
my ($ibx, $self) = @_;
- my $dir = abs_path($ibx->{inboxdir});
- if (!defined($dir)) {
- warn "W: $ibx->{inboxdir} not watched: $!\n";
- return;
- }
+ my $dir = $ibx->{inboxdir};
my $inot = $self->{inot};
my $cur = $self->{pathmap}->{$dir} //= [];
+ my $lock = "$dir/".($ibx->version >= 2 ? 'inbox.lock' : 'ssoma.lock');
# transfer old subscriptions to the current inbox, cancel the old watch
- if (my $old_ibx = $cur->[0]) {
+ my $old_ibx = $cur->[0];
+ $cur->[0] = $ibx;
+ if ($old_ibx) {
$ibx->{unlock_subs} and
die "BUG: $dir->{unlock_subs} should not exist";
$ibx->{unlock_subs} = $old_ibx->{unlock_subs};
+
+ # Linux::Inotify2::Watch::name matches if watches are the
+ # same, no point in replacing a watch of the same name
+ if ($cur->[1]->name eq $lock) {
+ $self->{on_unlock}->{$lock} = $ibx;
+ return;
+ }
+ # rare, name changed (v1 inbox converted to v2)
$cur->[1]->cancel; # Linux::Inotify2::Watch::cancel
}
- $cur->[0] = $ibx;
- my $lock = "$dir/".($ibx->version >= 2 ? 'inbox.lock' : 'ssoma.lock');
if (my $w = $cur->[1] = $inot->watch($lock, $IN_MODIFY)) {
$self->{on_unlock}->{$w->name} = $ibx;
} else {
warn "E: ".ref($inot)."->watch($lock, IN_MODIFY) failed: $!\n";
+ if ($!{ENOSPC} && $^O eq 'linux') {
+ warn <<"";
+I: consider increasing /proc/sys/fs/inotify/max_user_watches
+
+ }
}
# TODO: detect deleted packs (and possibly other files)
$pi_cfg->each_inbox(\&in2_arm, $self);
}
+# internal API for ease-of-use
+sub watch_inbox { in2_arm($_[1], $_[0]) };
+
sub new {
my ($class, $pi_cfg) = @_;
my $self = bless {}, $class;
$self->{inot} = $inot;
$self->{pathmap} = {}; # inboxdir => [ ibx, watch1, watch2, watch3...]
$self->{on_unlock} = {}; # lock path => ibx
- refresh($self, $pi_cfg);
+ refresh($self, $pi_cfg) if $pi_cfg;
PublicInbox::FakeInotify::poll_once($self) if !$ino_cls;
$self;
}
my @events = $self->{inot}->read; # Linux::Inotify2::read
my $on_unlock = $self->{on_unlock};
for my $ev (@events) {
- if (my $ibx = $on_unlock->{$ev->fullname}) {
+ my $fn = $ev->fullname // next; # cancelled
+ if (my $ibx = $on_unlock->{$fn}) {
$ibx->on_unlock;
}
}
require PublicInbox::Msgmap;
my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create
$sidx->begin_txn_lazy;
+ my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1);
if (defined $skip_artnum) {
- my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1);
$mm->{dbh}->begin_work;
$mm->skip_artnum($skip_artnum);
$mm->{dbh}->commit;
}
+ undef $mm; # ->created_at set
$sidx->commit_txn_lazy;
} else {
open my $fh, '>>', "$self->{inboxdir}/ssoma.lock" or
if ($self->version == 1) {
my $dir = assert_usable_dir($self);
PublicInbox::Import::init_bare($dir);
- $self->umask_prepare;
$self->with_umask(\&_init_v1, $self, $skip_artnum);
} else {
my $v2w = importer($self);
sub with_umask {
my ($self, $cb, @arg) = @_;
- my $old = umask $self->{umask};
+ my $old = umask($self->{umask} //= umask_prepare($self));
my $rv = eval { $cb->(@arg) };
my $err = $@;
umask $old;
sub umask_prepare {
my ($self) = @_;
my $perm = _git_config_perm($self);
- my $umask = _umask_for($perm);
- $self->{umask} = $umask;
+ _umask_for($perm);
}
sub cleanup ($) {
# this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..."
sub warn_ignore_cb {
- my $cb = $SIG{__WARN__} // sub { print STDERR @_ };
+ my $cb = $SIG{__WARN__} // \&CORE::warn;
sub {
return if warn_ignore(@_);
$cb->(@_);
my $docids = PublicInbox::Search::mset_to_artnums($self->{es}, $mset);
my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
my $qmarks = join(',', map { '?' } @$docids);
- if ($opt && ($opt->{mset} // 0) == 2) { # opt->{mset} = 2 was used
+ if ($opt && ($opt->{relevance} // 0) == -1) { # -1 => ENQ_ASCENDING
my $range = '';
my @r;
if (my $r = $opt->{uid_range}) {
}
if (scalar keys %order) {
warn "W: $self->{es}->{topdir} #",
- join(', #', sort keys %order),
+ join(', ', sort { $a <=> $b } keys %order),
" not mapped to `$self->{eidx_key}'\n";
warn "W: $self->{es}->{topdir} may need to be reindexed\n";
@xnums = grep { defined } @xnums;
}
if (scalar keys %order) {
warn "W: $ibx->{inboxdir} #",
- join(', #', sort keys %order),
+ join(', ', sort { $a <=> $b } keys %order),
" no longer valid\n";
warn "W: $self->{es}->{topdir} may need to be reindexed\n";
}
'Content-Length', bytes::length($out) ], [ $out ] ]
}
+sub per_inbox {
+ my ($ctx) = @_;
+ # only one inbox, slow is probably OK
+ slow_manifest_add($ctx, $ctx->{ibx});
+ psgi_triple($ctx);
+}
+
1;
my $over = $ctx->{ibx}->over or
return PublicInbox::WWW::need($ctx, 'Overview');
- my $qopts = $ctx->{qopts} = { mset => 2 }; # order by docid
+ my $qopts = $ctx->{qopts} = { relevance => -1 }; # ORDER BY docid ASC
$qopts->{thread} = 1 if $q->{t};
my $mset = $srch->mset($q_string, $qopts);
$qopts->{offset} = $mset->size or
use File::Path ();
use PublicInbox::MiscSearch;
use PublicInbox::Config;
+my $json;
sub new {
my ($class, $eidx) = @_;
nodatacow_dir($mi_dir);
my $flags = $PublicInbox::SearchIdx::DB_CREATE_OR_OPEN;
$flags |= $PublicInbox::SearchIdx::DB_NO_SYNC if $eidx->{-no_fsync};
+ $json //= PublicInbox::Config::json();
bless {
mi_dir => $mi_dir,
flags => $flags,
$xdb->delete_document($_) for @drop; # just in case
my $doc = $PublicInbox::Search::X{Document}->new;
+ term_generator($self)->set_document($doc);
- # allow sorting by modified
+ # allow sorting by modified and uidvalidity (created at)
add_val($doc, $PublicInbox::MiscSearch::MODIFIED, $ibx->modified);
+ add_val($doc, $PublicInbox::MiscSearch::UIDVALIDITY, $ibx->uidvalidity);
- $doc->add_boolean_term('Q'.$eidx_key);
- $doc->add_boolean_term('T'.'inbox');
- term_generator($self)->set_document($doc);
+ $doc->add_boolean_term('Q'.$eidx_key); # uniQue id
+ $doc->add_boolean_term('T'.'inbox'); # Type
+
+ if (defined($ibx->{newsgroup}) && $ibx->nntp_usable) {
+ $doc->add_boolean_term('T'.'newsgroup'); # additional Type
+ }
+
+ # force reread from disk, {description} could be loaded from {misc}
+ delete $ibx->{description};
+ my $desc = $ibx->description;
# description = S/Subject (or title)
# address = A/Author
- index_text($self, $ibx->description, 1, 'S');
+ index_text($self, $desc, 1, 'S');
+ index_text($self, $ibx->{name}, 1, 'XNAME');
my %map = (
address => 'A',
listid => 'XLISTID',
index_text($self, $v, 1, $pfx);
}
}
- index_text($self, $ibx->{name}, 1, 'XNAME');
my $data = {};
if (defined(my $max = $ibx->max_git_epoch)) { # v2
- my $desc = $ibx->description;
my $pfx = "/$ibx->{name}/git/";
for my $epoch (0..$max) {
my $git = $ibx->git_epoch($epoch) or return;
$ent->{git_dir} = $ibx->{inboxdir};
$data->{"/$ibx->{name}"} = $ent;
}
- $doc->set_data(PublicInbox::Config::json()->encode($data));
+ $doc->set_data($json->encode($data));
if (defined $docid) {
$xdb->replace_document($docid, $doc);
} else {
package PublicInbox::MiscSearch;
use strict;
use v5.10.1;
-use PublicInbox::Search qw(retry_reopen);
+use PublicInbox::Search qw(retry_reopen int_val);
+my $json;
# Xapian value columns:
our $MODIFIED = 0;
+our $UIDVALIDITY = 1; # (created time)
# avoid conflicting with message Search::prob_prefix for UI/UX reasons
my %PROB_PREFIX = (
sub new {
my ($class, $dir) = @_;
+ PublicInbox::Search::load_xapian();
+ $json //= PublicInbox::Config::json();
bless {
xdb => $PublicInbox::Search::X{Database}->new($dir)
}, $class;
sub mset {
my ($self, $qs, $opt) = @_;
$opt ||= {};
+ reopen($self);
my $qp = $self->{qp} //= mi_qp_new($self);
$qs = 'type:inbox' if $qs eq '';
my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS);
sub ibx_data_once {
my ($self, $ibx) = @_;
my $xdb = $self->{xdb};
- my $eidx_key = $ibx->eidx_key; # may be {inboxdir}, so private
- my $head = $xdb->postlist_begin('Q'.$eidx_key);
- my $tail = $xdb->postlist_end('Q'.$eidx_key);
+ my $term = 'Q'.$ibx->eidx_key; # may be {inboxdir}, so private
+ my $head = $xdb->postlist_begin($term);
+ my $tail = $xdb->postlist_end($term);
if ($head != $tail) {
my $doc = $xdb->get_document($head->get_docid);
+ $ibx->{uidvalidity} //= int_val($doc, $UIDVALIDITY);
+ $ibx->{-modified} = int_val($doc, $MODIFIED);
$doc->get_data;
} else {
undef;
retry_reopen($self, \&ibx_data_once, $ibx);
}
+sub ibx_cache_load {
+ my ($doc, $cache) = @_;
+ my $end = $doc->termlist_end;
+ my $cur = $doc->termlist_begin;
+ $cur->skip_to('Q');
+ return if $cur == $end;
+ my $eidx_key = $cur->get_termname;
+ $eidx_key =~ s/\AQ// or return; # expired
+ my $ce = $cache->{$eidx_key} = {};
+ $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
+ $ce->{-modified} = int_val($doc, $MODIFIED);
+ $ce->{description} = do {
+ # extract description from manifest.js.gz epoch description
+ my $d;
+ my $data = $json->decode($doc->get_data);
+ for (values %$data) {
+ $d = $_->{description} // next;
+ $d =~ s/ \[epoch [0-9]+\]\z// or next;
+ last;
+ }
+ $d;
+ }
+}
+
+sub _nntpd_cache_load { # retry_reopen callback
+ my ($self) = @_;
+ my $opt = { limit => $self->{xdb}->get_doccount * 10, relevance => -1 };
+ my $mset = mset($self, 'type:newsgroup type:inbox', $opt);
+ my $cache = {};
+ for my $it ($mset->items) {
+ ibx_cache_load($it->get_document, $cache);
+ }
+ $cache
+}
+
+# returns { newsgroup => $cache_entry } mapping, $cache_entry contains
+# anything which may trigger seeks at startup, currently: description,
+# -modified, and uidvalidity.
+sub nntpd_cache_load {
+ my ($self) = @_;
+ retry_reopen($self, \&_nntpd_cache_load);
+}
+
+no warnings 'once';
+*reopen = \&PublicInbox::Search::reopen;
+
1;
my ($self, $sig) = @_;
my $pi_cfg = $sig ? PublicInbox::Config->new : $self->{pi_cfg};
my $groups = $pi_cfg->{-by_newsgroup}; # filled during each_inbox
+ my $cache = eval { $pi_cfg->ALL->misc->nntpd_cache_load } // {};
$pi_cfg->each_inbox(sub {
my ($ibx) = @_;
my $ngname = $ibx->{newsgroup} // return;
- if ($ibx->nntp_usable) {
+ my $ce = $cache->{$ngname};
+ if (($ce and (%$ibx = (%$ibx, %$ce))) || $ibx->nntp_usable) {
# only valid if msgmap and over works
# preload to avoid fragmentation:
$ibx->description;
sub create {
my ($self) = @_;
- unless (-r $self->{filename}) {
+ my $fn = $self->{filename} // do {
+ Carp::confess('BUG: no {filename}') unless $self->{dbh};
+ return;
+ };
+ unless (-r $fn) {
require File::Path;
require File::Basename;
- File::Path::mkpath(File::Basename::dirname($self->{filename}));
+ File::Path::mkpath(File::Basename::dirname($fn));
}
# create the DB:
PublicInbox::Over::dbh($self);
package PublicInbox::Search;
use strict;
use parent qw(Exporter);
-our @EXPORT_OK = qw(retry_reopen);
+our @EXPORT_OK = qw(retry_reopen int_val);
use List::Util qw(max);
# values for searching, changing the numeric value breaks
our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query);
our $Xap; # 'Search::Xapian' or 'Xapian'
our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor')
-our $ENQ_ASCENDING;
+
+# ENQ_DESCENDING and ENQ_ASCENDING weren't in SWIG Xapian.pm prior to 1.4.16,
+# let's hope the ABI is stable
+our $ENQ_DESCENDING = 0;
+our $ENQ_ASCENDING = 1;
sub load_xapian () {
return 1 if defined $Xap;
'NumberRangeProcessor' : 'NumberValueRangeProcessor');
$X{$_} = $Xap.'::'.$_ for (keys %X);
- # ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm,
- # so lets hope this part of the ABI is stable because it's
- # just an integer:
- $ENQ_ASCENDING = $x eq 'Xapian' ?
- 1 : Search::Xapian::ENQ_ASCENDING();
-
*sortable_serialise = $x.'::sortable_serialise';
+ *sortable_unserialise = $x.'::sortable_unserialise';
# n.b. FLAG_PURE_NOT is expensive not suitable for a public
# website as it could become a denial-of-service vector
# FLAG_PHRASE also seems to cause performance problems chert
$opts ||= {};
my $qp = $self->{qp} //= qparse_new($self);
my $query = $qp->parse_query($query_string, $self->{qp_flags});
- $opts->{relevance} = 1 unless exists $opts->{relevance};
_do_enquire($self, $query, $opts);
}
$enquire->set_query($query);
$opts ||= {};
my $desc = !$opts->{asc};
- if (($opts->{mset} || 0) == 2) { # mset == 2: ORDER BY docid/UID
+ my $rel = $opts->{relevance} // 0;
+ if ($rel == -1) { # ORDER BY docid/UID
+ $enquire->set_weighting_scheme($X{BoolWeight}->new);
$enquire->set_docid_order($ENQ_ASCENDING);
+ } elsif ($rel == 0) {
+ $enquire->set_sort_by_value_then_relevance(TS, $desc);
+ } elsif ($rel == -2) {
$enquire->set_weighting_scheme($X{BoolWeight}->new);
- } elsif ($opts->{relevance}) {
+ $enquire->set_docid_order($ENQ_DESCENDING);
+ } else { # rel > 0
$enquire->set_sort_by_relevance_then_value(TS, $desc);
- } else {
- $enquire->set_sort_by_value_then_relevance(TS, $desc);
}
# `mairix -t / --threads' or JMAP collapseThreads
\@ret;
}
+sub int_val ($$) {
+ my ($doc, $col) = @_;
+ my $val = $doc->get_value($col) or return; # undefined is '' in Xapian
+ sortable_unserialise($val) + 0; # PV => IV conversion
+}
+
1;
$self->{-set_skip_docdata_once} = 1;
$self->{-skip_docdata} = 1;
}
- $ibx->umask_prepare;
if ($version == 1) {
$self->{lock_path} = "$inboxdir/ssoma.lock";
my $dir = $self->xdir;
}
eval 'require '.$X->{WritableDatabase} or die;
*sortable_serialise = $xap.'::sortable_serialise';
- *sortable_unserialise = $xap.'::sortable_unserialise';
$DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
$DB_OPEN = eval($xap.'::DB_OPEN()');
my $ver = (eval($xap.'::major_version()') << 16) |
$self->{xdb}->replace_document($docid, $doc) if $replace;
}
-sub get_val ($$) {
- my ($doc, $col) = @_;
- sortable_unserialise($doc->get_value($col));
-}
-
sub smsg_from_doc ($) {
my ($doc) = @_;
my $data = $doc->get_data or return;
my $smsg = bless {}, 'PublicInbox::Smsg';
- $smsg->{ts} = get_val($doc, PublicInbox::Search::TS());
- my $dt = get_val($doc, PublicInbox::Search::DT());
+ $smsg->{ts} = int_val($doc, PublicInbox::Search::TS());
+ my $dt = int_val($doc, PublicInbox::Search::DT());
my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $dt);
$smsg->{ds} = timegm($ss, $mm, $hh, $dd, $mon - 1, $yyyy);
$smsg->load_from_data($data);
$smsg->{num} = index_mm($self, $eml, $oid, $sync) or
die "E: could not generate NNTP article number for $oid";
add_message($self, $eml, $smsg, $sync);
+ ++$self->{nidx};
my $cur_cmt = $sync->{cur_cmt} // die 'BUG: {cur_cmt} missing';
${$sync->{latest_cmt}} = $cur_cmt;
}
if (defined(my $cur_cmt = $sync->{cur_cmt})) {
${$sync->{latest_cmt}} = $cur_cmt;
}
+ ++$self->{nidx};
}
sub with_umask {
our $epoll_wait_events;
our $epoll_wait_size = 0;
sub epoll_wait_mod4 {
- # resize our static buffer if requested size is bigger than we've ever done
- if ($_[1] > $epoll_wait_size) {
- $epoll_wait_size = $_[1];
- $epoll_wait_events = "\0" x 12 x $epoll_wait_size;
- }
- my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
- for (0..$ct-1) {
- @{$_[3]->[$_]}[1,0] = unpack("LL", substr($epoll_wait_events, 12*$_, 8));
- }
- return $ct;
+ my ($epfd, $maxevents, $timeout_msec, $events) = @_;
+ # resize our static buffer if maxevents bigger than we've ever done
+ if ($maxevents > $epoll_wait_size) {
+ $epoll_wait_size = $maxevents;
+ vec($epoll_wait_events, $maxevents * 12 * 8 - 1, 1) = 0;
+ }
+ @$events = ();
+ my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
+ $maxevents, $timeout_msec);
+ for (0..$ct - 1) {
+ # 12-byte struct epoll_event
+ # 4 bytes uint32_t events mask (skipped, useless to us)
+ # 8 bytes: epoll_data_t union (first 4 bytes are the fd)
+ # So we skip the first 4 bytes and take the middle 4:
+ $events->[$_] = unpack('L', substr($epoll_wait_events,
+ 12 * $_ + 4, 4));
+ }
}
sub epoll_wait_mod8 {
- # resize our static buffer if requested size is bigger than we've ever done
- if ($_[1] > $epoll_wait_size) {
- $epoll_wait_size = $_[1];
- $epoll_wait_events = "\0" x 16 x $epoll_wait_size;
- }
- my $ct;
- if ($no_deprecated) {
- $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0, undef);
- } else {
- $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
- }
- for (0..$ct-1) {
- # 16 byte epoll_event structs, with format:
- # 4 byte mask [idx 1]
- # 4 byte padding (we put it into idx 2, useless)
- # 8 byte data (first 4 bytes are fd, into idx 0)
- @{$_[3]->[$_]}[1,2,0] = unpack("LLL", substr($epoll_wait_events, 16*$_, 12));
- }
- return $ct;
+ my ($epfd, $maxevents, $timeout_msec, $events) = @_;
+
+ # resize our static buffer if maxevents bigger than we've ever done
+ if ($maxevents > $epoll_wait_size) {
+ $epoll_wait_size = $maxevents;
+ vec($epoll_wait_events, $maxevents * 16 * 8 - 1, 1) = 0;
+ }
+ @$events = ();
+ my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
+ $maxevents, $timeout_msec,
+ $no_deprecated ? undef : ());
+ for (0..$ct - 1) {
+ # 16-byte struct epoll_event
+ # 4 bytes uint32_t events mask (skipped, useless to us)
+ # 4 bytes padding (skipped, useless)
+ # 8 bytes epoll_data_t union (first 4 bytes are the fd)
+ # So skip the first 8 bytes, take 4, and ignore the last 4:
+ $events->[$_] = unpack('L', substr($epoll_wait_events,
+ 16 * $_ + 8, 4));
+ }
}
sub signalfd ($$$) {
use Crypt::CBC;
use Plack::Util;
use MIME::Base64 qw(decode_base64url);
-my $CODE_URL = 'https://public-inbox.org/public-inbox.git';
+my @CODE_URL = qw(http://ou63pmih66umazou.onion/public-inbox.git
+ https://public-inbox.org/public-inbox.git);
my @CT_HTML = ('Content-Type', 'text/html; charset=UTF-8');
sub new {
my $unsubscribe = $opt{unsubscribe} or
die "`unsubscribe' callback not given\n";
+ my $code_url = $opt{code_url} || \@CODE_URL;
+ $code_url = [ $code_url ] if ref($code_url) ne 'ARRAY';
bless {
pi_cfg => $opt{pi_config}, # PublicInbox::Config
owner_email => $opt{owner_email},
cipher => $cipher,
unsubscribe => $unsubscribe,
contact => qq(<a\nhref="mailto:$e">$e</a>),
- code_url => $opt{code_url} || $CODE_URL,
+ code_url => $code_url,
confirm => $opt{confirm},
}, $class;
}
"<html><head><title>$title</title></head><body><pre>".
join("\n", "<b>$title</b>\n", @body) . '</pre><hr>'.
"<pre>This page is available under AGPL-3.0+\n" .
- "git clone $self->{code_url}\n" .
+ join('', map { "git clone $_\n" } @{$self->{code_url}}) .
qq(Email $self->{contact} if you have any questions).
'</pre></body></html>'
] ];
die "$dir does not exist\n";
}
}
- $v2ibx->umask_prepare;
-
my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION;
my $self = {
ibx => $v2ibx,
$ibx->git->cleanup;
parallel_init($self, $ibx->{indexlevel});
- $ibx->umask_prepare;
$ibx->with_umask(\&_idx_init, $self, $opt);
}
$self->done; # release lock
}
- if (my $pr = $sync->{-opt}->{-progress}) {
+ if (my $pr = $sync->{-regen_fmt} ? $sync->{-opt}->{-progress} : undef) {
$pr->(sprintf($sync->{-regen_fmt}, ${$sync->{nr}}));
}
$mm_tmp->atfork_parent if $mm_tmp;
}
+sub index_finalize ($$) {
+ my ($arg, $index) = @_;
+ ++$arg->{self}->{nidx};
+ if (defined(my $cur = $arg->{cur_cmt})) {
+ ${$arg->{latest_cmt}} = $cur;
+ } elsif ($index) {
+ die 'BUG: {cur_cmt} missing';
+ } # else { unindexing @leftovers doesn't set {cur_cmt}
+}
+
sub index_oid { # cat_async callback
my ($bref, $oid, $type, $size, $arg) = @_;
- return if is_bad_blob($oid, $type, $size, $arg->{oid});
+ is_bad_blob($oid, $type, $size, $arg->{oid}) and
+ return index_finalize($arg, 1); # size == 0 purged returns here
my $self = $arg->{self};
local $self->{current_info} = "$self->{current_info} $oid";
- return if $size == 0; # purged
my ($num, $mid0);
my $eml = PublicInbox::Eml->new($$bref);
my $mids = mids($eml);
if (do_idx($self, $bref, $eml, $smsg)) {
${$arg->{need_checkpoint}} = 1;
}
- ${$arg->{latest_cmt}} = $arg->{cur_cmt} // die 'BUG: {cur_cmt} missing';
+ index_finalize($arg, 1);
}
# only update last_commit for $i on reindex iff newer than current
-d $git_dir or next; # missing epochs are fine
my $git = PublicInbox::Git->new($git_dir);
my $unit = { git => $git, epoch => $i };
+ my $tip;
if ($reindex_heads) {
- $head = $reindex_heads->[$i] or next;
+ $tip = $head = $reindex_heads->[$i] or next;
+ } else {
+ $tip = $git->qx(qw(rev-parse -q --verify), $head);
+ next if $?; # new repo
+ chomp $tip;
}
- chomp(my $tip = $git->qx(qw(rev-parse -q --verify), $head));
- next if $?; # new repo
-
my $range = log_range($sync, $unit, $tip) or next;
# can't use 'rev-list --count' if we use --diff-filter
$pr->("$pfx $i.git counting $range ... ") if $pr;
}
sub unindex_oid ($$;$) { # git->cat_async callback
- my ($bref, $oid, $type, $size, $sync) = @_;
- return if is_bad_blob($oid, $type, $size, $sync->{oid});
- my $self = $sync->{self};
+ my ($bref, $oid, $type, $size, $arg) = @_;
+ is_bad_blob($oid, $type, $size, $arg->{oid}) and
+ return index_finalize($arg, 0);
+ my $self = $arg->{self};
local $self->{current_info} = "$self->{current_info} $oid";
- my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef;
+ my $unindexed = $arg->{in_unindex} ? $arg->{unindexed} : undef;
my $mm = $self->{mm};
my $mids = mids(PublicInbox::Eml->new($bref));
undef $$bref;
}
unindex_oid_aux($self, $oid, $mid);
}
+ index_finalize($arg, 0);
}
sub git { $_[0]->{ibx}->git }
$opt //= {};
return xapian_only($self, $opt) if $opt->{xapian_only};
- my $pr = $opt->{-progress};
my $epoch_max;
- my $latest = $self->{ibx}->git_dir_latest(\$epoch_max);
- return unless defined $latest;
+ my $latest = $self->{ibx}->git_dir_latest(\$epoch_max) // return;
+ if ($opt->{'fast-noop'}) { # nanosecond (st_ctim) comparison
+ use Time::HiRes qw(stat);
+ if (my @mm = stat("$self->{ibx}->{inboxdir}/msgmap.sqlite3")) {
+ my $c = $mm[10]; # 10 = ctime (nsec NV)
+ my @hd = stat("$latest/refs/heads");
+ my @pr = stat("$latest/packed-refs");
+ return if $c > ($hd[10] // 0) && $c > ($pr[10] // 0);
+ }
+ }
+ my $pr = $opt->{-progress};
my $seq = $opt->{sequential_shard};
my $art_beg; # the NNTP article number we start xapian_only at
my $idxlevel = $self->{ibx}->{indexlevel};
my $r404 = invalid_inbox($ctx, $inbox);
return $r404 if $r404;
require PublicInbox::ManifestJsGz;
- PublicInbox::ManifestJsGz->response($ctx);
+ PublicInbox::ManifestJsGz::per_inbox($ctx);
}
sub get_attach {
warn "unmappable dir: $1\n";
return;
}
- my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+ my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
local $SIG{__WARN__} = sub {
my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
$warn_cb->($pfx, "path: $path\n", @_);
my $key = $req;
$key =~ s/\.PEEK//;
my ($uids, $batch);
- my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+ my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
local $SIG{__WARN__} = sub {
my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
$batch //= '?';
$beg = $l_art + 1;
warn "I: $url fetching ARTICLE $beg..$end\n";
- my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
+ my $warn_cb = $SIG{__WARN__} || \&CORE::warn;
my ($err, $art);
local $SIG{__WARN__} = sub {
my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
sub response {
my ($class, $ctx) = @_;
bless $ctx, $class;
+ if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
+ $ALL->misc->reopen;
+ }
my $re = $ctx->url_regexp or return $ctx->psgi_triple;
my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
\&list_match_i, $re, $ctx);
use bytes (); # length
use PublicInbox::Hval qw(ascii_html prurl ts2str);
our $TOR_URL = 'https://www.torproject.org/';
-our $CODE_URL = 'https://public-inbox.org/public-inbox.git';
+our $CODE_URL = [ qw(http://ou63pmih66umazou.onion/public-inbox.git
+ https://public-inbox.org/public-inbox.git) ];
sub base_url ($) {
my $ctx = shift;
sub coderepos ($) {
my ($ctx) = @_;
- my $ibx = $ctx->{ibx};
+ my $cr = $ctx->{ibx}->{coderepo} // return ();
+ my $cfg = $ctx->{www}->{pi_cfg};
+ my $upfx = ($ctx->{-upfx} // ''). '../';
my @ret;
- if (defined(my $cr = $ibx->{coderepo})) {
- my $cfg = $ctx->{www}->{pi_cfg};
- my $env = $ctx->{env};
- for my $cr_name (@$cr) {
- my $urls = $cfg->{"coderepo.$cr_name.cgiturl"};
- if ($urls) {
- $ret[0] //= <<EOF;
+ for my $cr_name (@$cr) {
+ my $urls = $cfg->{"coderepo.$cr_name.cgiturl"} // next;
+ $ret[0] //= <<EOF;
code repositories for the project(s) associated with this inbox:
EOF
- $ret[0] .= "\n\t".prurl($env, $_) for @$urls;
- }
+ for (@$urls) {
+ # relative or absolute URL?, prefix relative "foo.git"
+ # with appropriate number of "../"
+ my $u = m!\A(?:[a-z\+]+:)?//! ? $_ : $upfx.$_;
+ $u = ascii_html(prurl($ctx->{env}, $u));
+ $ret[0] .= qq(\n\t<a\nhref="$u">$u</a>);
}
}
- @ret; # may be empty
+ @ret; # may be empty, this sub is called as an arg for join()
}
sub code_footer ($) {
local %SIG = %SIG;
setup_signals();
- $ibx->umask_prepare;
$ibx->with_umask(\&_run, $ibx, $cb, $opt);
}
die "$new_dir exists\n" if -d $new_dir;
die "$old_dir not a directory\n" unless -d $old_dir;
-require Cwd;
-Cwd->import('abs_path');
+require PublicInbox::Admin;
require PublicInbox::Config;
require PublicInbox::InboxWritable;
-my $abs = abs_path($old_dir);
-die "failed to resolve $old_dir: $!\n" if (!defined($abs));
-
my $cfg = PublicInbox::Config->new;
-my $old;
-$cfg->each_inbox(sub {
- $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir;
-});
-if ($old) {
- $old = PublicInbox::InboxWritable->new($old);
-} else {
+my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg);
+@old > 1 and die "BUG: resolved several inboxes from $old_dir:\n",
+ map { "\t$_->{inboxdir}\n" } @old;
+my $old = PublicInbox::InboxWritable->new($old[0]);
+if (delete $old->{-unconfigured}) {
warn "W: $old_dir not configured in " .
PublicInbox::Config::default_file() . "\n";
- $old = PublicInbox::InboxWritable->new({
- inboxdir => $old_dir,
- name => 'ignored',
- -primary_address => 'old@example.com',
- address => [ 'old@example.com' ],
- });
}
die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2;
-require File::Spec;
require PublicInbox::Admin;
my $detected = PublicInbox::Admin::detect_indexlevel($old);
$old->{indexlevel} //= $detected;
}
local %ENV = (%$env, %ENV) if $env;
my $new = { %$old };
-$new->{inboxdir} = File::Spec->canonpath($new_dir);
+$new->{inboxdir} = $cfg->rel2abs_collapsed($new_dir);
$new->{version} = 2;
$new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} });
$new->{-no_fsync} = 1 if !$opt->{fsync};
my $v2w;
-$old->umask_prepare;
sub link_or_copy ($$) {
my ($src, $dst) = @_;
# rename/relink $edit_fn
open my $new_fh, '<', $edit_fn or
die "can't read edited file ($edit_fn): $!\n";
- my $new_raw = do { local $/; <$new_fh> };
+ defined(my $new_raw = do { local $/; <$new_fh> }) or die
+ "read $edit_fn: $!\n";
if (!$opt->{raw}) {
# get rid of the From we added
use v5.10.1;
use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
-usage: public-inbox-extindex [options] EXTINDEX_DIR [INBOX_DIR]
+usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...]
Create and update external (detached) search indices
--no-fsync speed up indexing, risk corruption on power outage
+ --watch run persistently and watch for inbox updates
-L LEVEL `medium', or `full' (default: full)
--all index all configured inboxes
--jobs=NUM set or disable parallelization (NUM=0)
BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
See public-inbox-extindex(1) man page for full documentation.
EOF
-my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 };
+my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 };
GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i
fsync|sync!
indexlevel|index-level|L=s max_size|max-size=s
batch_size|batch-size=s
- gc
+ gc commit-interval=i watch scan!
all help|h))
or die $help;
if ($opt->{help}) { print $help; exit 0 };
die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
-
-# require lazily to speed up --help
-my $eidx_dir = shift(@ARGV) // die "E: $help";
+require IO::Handle;
+STDOUT->autoflush(1);
+STDERR->autoflush(1);
local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync
+# require lazily to speed up --help
require PublicInbox::Admin;
my $cfg = PublicInbox::Config->new;
+my $eidx_dir = shift(@ARGV);
+unless (defined $eidx_dir) {
+ if ($opt->{all} && $cfg->ALL) {
+ $eidx_dir = $cfg->ALL->{topdir};
+ } else {
+ die "E: $help";
+ }
+}
my @ibxs;
if ($opt->{gc}) {
die "E: inbox paths must not be specified with --gc\n" if @ARGV;
- die "E: --all not compatible --gc\n" if $opt->{all};
+ die "E: --all not compatible with --gc\n" if $opt->{all};
+ die "E: --watch is not compatible with --gc\n" if $opt->{watch};
} else {
@ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
}
$eidx->attach_config($cfg);
$eidx->eidx_gc($opt);
} else {
- $eidx->attach_inbox($_) for @ibxs;
- $eidx->eidx_sync($opt);
+ if ($opt->{all}) {
+ $eidx->attach_config($cfg);
+ } else {
+ $eidx->attach_inbox($_) for @ibxs;
+ }
+ if ($opt->{watch}) {
+ $cfg = undef; # save memory only after SIGHUP
+ $eidx->eidx_watch($opt);
+ } else {
+ $eidx->eidx_sync($opt);
+ }
}
--no-fsync speed up indexing, risk corruption on power outage
-L LEVEL `basic', `medium', or `full' (default: full)
- -E EIDX update EIDX (e.g. `all')
+ -E EXTINDEX update extindex (default: `all')
--all index all configured inboxes
--compact | -c run public-inbox-compact(1) after indexing
--sequential-shard index Xapian shards sequentially for slow storage
BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes)
See public-inbox-index(1) man page for full documentation.
EOF
-my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 };
+my $opt = {
+ quiet => -1, compact => 0, max_size => undef, fsync => 1,
+ 'update-extindex' => [], # ":s@" optional arg sets '' if no arg given
+};
GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
fsync|sync! xapian_only|xapian-only
indexlevel|index-level|L=s max_size|max-size=s
batch_size|batch-size=s
sequential_shard|seq-shard|sequential-shard
- skip-docdata all help|h))
+ no-update-extindex update-extindex|E=s@
+ fast-noop|F skip-docdata all help|h))
or die $help;
if ($opt->{help}) { print $help; exit 0 };
die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
if ($opt->{xapian_only} && !$opt->{reindex}) {
die "--xapian-only requires --reindex\n";
}
+if ($opt->{reindex} && delete($opt->{'fast-noop'})) {
+ warn "--fast-noop ignored with --reindex\n";
+}
# require lazily to speed up --help
require PublicInbox::Admin;
PublicInbox::Admin::require_or_die('-index');
unless (@ibxs) { print STDERR $help; exit 1 }
+my (@eidx, %eidx_seen);
+my $update_extindex = $opt->{'update-extindex'};
+if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) {
+ # extindex and normal inboxes may have different owners
+ push(@$update_extindex, 'all') if -w $ALL->{topdir};
+}
+@$update_extindex = () if $opt->{'no-update-extindex'};
+if (scalar @$update_extindex) {
+ PublicInbox::Admin::require_or_die('-search');
+ require PublicInbox::ExtSearchIdx;
+}
+for my $ei_name (@$update_extindex) {
+ my $es = $cfg->lookup_ei($ei_name);
+ my $topdir;
+ if (!$es && -d $ei_name) { # allow dirname or config section name
+ $topdir = $ei_name;
+ } elsif ($es) {
+ $topdir = $es->{topdir};
+ } else {
+ die "extindex `$ei_name' not configured or found\n";
+ }
+ my $o = { %$opt };
+ delete $o->{indexlevel} if ($o->{indexlevel}//'') eq 'basic';
+ $eidx_seen{$topdir} //=
+ push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $o));
+}
my $mods = {};
+my @eidx_unconfigured;
foreach my $ibx (@ibxs) {
# detect_indexlevel may also set $ibx->{-skip_docdata}
my $detected = PublicInbox::Admin::detect_indexlevel($ibx);
$ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ?
'full' : $detected);
PublicInbox::Admin::scan_ibx_modules($mods, $ibx);
+ if (@eidx && $ibx->{-unconfigured}) {
+ push @eidx_unconfigured, " $ibx->{inboxdir}\n";
+ }
}
+warn <<EOF if @eidx_unconfigured;
+The following inboxes are unconfigured and will not be updated in
+@$update_extindex:\n@eidx_unconfigured
+EOF
# "Search::Xapian" includes SWIG "Xapian", too:
$opt->{compact} = 0 if !$mods->{'Search::Xapian'};
EOL
$ibx_opt = { %$opt, sequential_shard => $v };
}
- PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
+ my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
last if $ibx_opt->{quit};
if (my $copt = $opt->{compact_opt}) {
local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard};
PublicInbox::Xapcmd::run($ibx, 'compact', $copt);
}
+ last if $ibx_opt->{quit};
+ next if $ibx->{-unconfigured} || !$nidx;
+ for my $eidx (@eidx) {
+ $eidx->attach_inbox($ibx);
+ }
+}
+my $pr = $opt->{-progress};
+for my $eidx (@eidx) {
+ $pr->("indexing $eidx->{topdir} ...\n") if $pr;
+ $eidx->eidx_sync($opt);
+ last if $opt->{quit};
}
defined $perm or die "(f)stat failed on $pi_config: $!\n";
chmod($perm & 07777, $fh) or
die "(f)chmod failed on future $pi_config: $!\n";
- my $old;
- {
- local $/;
- $old = <$oh>;
- }
+ defined(my $old = do { local $/; <$oh> }) or die "read $pi_config: $!\n";
print $fh $old or die "failed to write: $!\n";
close $oh or die "failed to close $pi_config: $!\n";
my $pfx = "publicinbox.$name";
my @x = (qw/git config/, "--file=$pi_config_tmp");
-require File::Spec;
-$inboxdir = File::Spec->canonpath($inboxdir);
+$inboxdir = PublicInbox::Config::rel2abs_collapsed($inboxdir);
+die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0;
-die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s;
if (-f "$inboxdir/inbox.lock") {
if (!defined $version) {
$version = 2;
$ibx->{-skip_docdata} = $skip_docdata;
}
$ibx->init_inbox(0, $skip_epoch, $skip_artnum);
-require Cwd;
-my $tmp = Cwd::abs_path($inboxdir);
-defined($tmp) or die "failed to resolve $inboxdir: $!\n";
-$inboxdir = $tmp;
-die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s;
# needed for git prior to v2.1.0
umask(0077) if defined $perm;
my $pi_cfg = PublicInbox::Config->new;
my $err;
my $mime = PublicInbox::Eml->new(do{
- local $/;
- my $data = <STDIN>;
+ defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n";
$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
if ($train ne 'rm') {
my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt);
PublicInbox::AdminEdit::check_editable(\@ibxs);
-my $data = do { local $/; <STDIN> };
+defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n";
$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
my $n_purged = 0;
my ($res, $err, $v);
PublicInbox::Import::init_bare($git_dir);
-*resolve_inboxdir = do {
- no warnings 'once';
- *PublicInbox::Admin::resolve_inboxdir;
-};
+*resolve_inboxdir = \&PublicInbox::Admin::resolve_inboxdir;
# v1
is(resolve_inboxdir($git_dir), $git_dir, 'top-level GIT_DIR resolved');
ok(-e "$v2_dir/inbox.lock", 'exists');
is(resolve_inboxdir($v2_dir), $v2_dir,
'resolve_inboxdir works on v2_dir');
- ok(chdir($v2_dir), 'chdir v2_dir OK');
+ chdir($v2_dir) or BAIL_OUT "chdir v2_dir: $!";
is(resolve_inboxdir(), $v2_dir, 'resolve_inboxdir works inside v2_dir');
$res = resolve_inboxdir(undef, \$v);
is($v, 2, 'version 2 detected');
is($res, $v2_dir, 'detects directory along with version');
# TODO: should work from inside Xapian dirs, and git dirs, here...
+ PublicInbox::Import::init_bare("$v2_dir/git/0.git");
+ my $objdir = "$v2_dir/git/0.git/objects";
+ is($v2_dir, resolve_inboxdir($objdir, \$v), 'at $objdir');
+ is($v, 2, 'version 2 detected at $objdir');
+ chdir($objdir) or BAIL_OUT "chdir objdir: $!";
+ is(resolve_inboxdir(undef, \$v), $v2_dir, 'inside $objdir');
+ is($v, 2, 'version 2 detected inside $objdir');
}
-chdir '/';
+chdir '/' or BAIL_OUT "chdir: $!";
my @pairs = (
'1g' => 1024 ** 3,
}
SKIP: {
+ # XXX wildcard match requires git 2.26+
require_git('1.8.5', 2) or
skip 'git 1.8.5+ required for --url-match', 2;
my $f = "$tmpdir/urlmatch";
open my $fh, '>', $f or BAIL_OUT $!;
print $fh <<EOF or BAIL_OUT $!;
-[imap "imap://*.example.com"]
+[imap "imap://mail.example.com"]
pollInterval = 9
EOF
close $fh or BAIL_OUT;
pipe($x, $y) or die;
is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($r), EPOLLIN), 0, 'add EPOLLIN');
my $events = [];
-my $n = $p->epoll_wait(9, 0, $events);
+$p->epoll_wait(9, 0, $events);
is_deeply($events, [], 'no events set');
-is($n, 0, 'nothing ready, yet');
is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($w), EPOLLOUT|EPOLLONESHOT), 0,
'add EPOLLOUT|EPOLLONESHOT');
-$n = $p->epoll_wait(9, -1, $events);
-is($n, 1, 'got POLLOUT event');
-is($events->[0]->[0], fileno($w), '$w ready');
+$p->epoll_wait(9, -1, $events);
+is(scalar(@$events), 1, 'got POLLOUT event');
+is($events->[0], fileno($w), '$w ready');
-$n = $p->epoll_wait(9, 0, $events);
-is($n, 0, 'nothing ready after oneshot');
+$p->epoll_wait(9, 0, $events);
+is(scalar(@$events), 0, 'nothing ready after oneshot');
is_deeply($events, [], 'no events set after oneshot');
syswrite($w, '1') == 1 or die;
for my $t (0..1) {
- $n = $p->epoll_wait(9, $t, $events);
- is($events->[0]->[0], fileno($r), "level-trigger POLLIN ready #$t");
- is($n, 1, "only event ready #$t");
+ $p->epoll_wait(9, $t, $events);
+ is($events->[0], fileno($r), "level-trigger POLLIN ready #$t");
+ is(scalar(@$events), 1, "only event ready #$t");
}
syswrite($y, '1') == 1 or die;
is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($x), EPOLLIN|EPOLLONESHOT), 0,
'EPOLLIN|EPOLLONESHOT add');
-is($p->epoll_wait(9, -1, $events), 2, 'epoll_wait has 2 ready');
-my @fds = sort(map { $_->[0] } @$events);
+$p->epoll_wait(9, -1, $events);
+is(scalar @$events, 2, 'epoll_wait has 2 ready');
+my @fds = sort @$events;
my @exp = sort((fileno($r), fileno($x)));
is_deeply(\@fds, \@exp, 'got both ready FDs');
is($p->epoll_ctl(EPOLL_CTL_DEL, fileno($r), 0), 0, 'EPOLL_CTL_DEL OK');
-$n = $p->epoll_wait(9, 0, $events);
-is($n, 0, 'nothing ready after EPOLL_CTL_DEL');
+$p->epoll_wait(9, 0, $events);
+is(scalar @$events, 0, 'nothing ready after EPOLL_CTL_DEL');
done_testing;
'epoll_ctl socket EPOLLOUT');
my @events;
-is(epoll_wait($epfd, 100, 10000, \@events), 1, 'epoll_wait returns');
+epoll_wait($epfd, 100, 10000, \@events);
is(scalar(@events), 1, 'got one event');
-is($events[0]->[0], fileno($w), 'got expected FD');
-is($events[0]->[1], EPOLLOUT, 'got expected event');
+is($events[0], fileno($w), 'got expected FD');
close $w;
-is(epoll_wait($epfd, 100, 0, \@events), 0, 'epoll_wait timeout');
+epoll_wait($epfd, 100, 0, \@events);
+is(@events, 0, 'epoll_wait timeout');
done_testing;
is(length($$x), $size, 'read correct number of bytes');
my $ref = $gcf->qx(qw(cat-file blob), $buf);
+ is($?, 0, 'no error on scalar success');
my @ref = $gcf->qx(qw(cat-file blob), $buf);
+ is($?, 0, 'no error on wantarray success');
my $nl = scalar @ref;
ok($nl > 1, "qx returned array length of $nl");
+ is(join('', @ref), $ref, 'qx array and scalar context both work');
$gcf->qx(qw(repack -adq));
ok($gcf->packed_bytes > 0, 'packed size is positive');
+ $gcf->qx(qw(rev-parse --verify bogus));
+ isnt($?, 0, '$? set on failure'.$?);
}
SKIP: {
# ensure IDLE persists across HUP, w/o extra watches or FDs
$td->kill('HUP') or BAIL_OUT "failed to kill -imapd: $!";
- SKIP: {
- skip 'no inotify fdinfo (or support)', 2 if !@ino_info;
- my (@tmp, %prev);
- local $/ = "\n";
- my $end = time + 5;
- until (time > $end) {
- select undef, undef, undef, 0.01;
- open my $fh, '<', $ino_fdinfo or
- BAIL_OUT "$ino_fdinfo: $!";
- %prev = map { $_ => 1 } @ino_info;
- @tmp = grep(/^inotify wd:/, <$fh>);
- if (scalar(@tmp) == scalar(@ino_info)) {
- delete @prev{@tmp};
- last if scalar(keys(%prev)) == @ino_info;
- }
- }
- is(scalar @tmp, scalar @ino_info,
- 'old inotify watches replaced');
- is(scalar keys %prev, scalar @ino_info,
- 'no previous watches overlap');
- };
+ for my $n (1..2) { # kick the event loop so we know HUP is done
+ my $m = $imap_client->new(%mic_opt);
+ ok($m->login && $m->IsAuthenticated && $m->logout,
+ "connection $n works after HUP");
+ }
open($fh, '<', 't/data/0001.patch') or BAIL_OUT("open: $!");
run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or
like($smsg->{to}, qr/\blist\@example\.com\b/, 'to appears');
my $doc = $m->get_document;
my $col = PublicInbox::Search::BYTES();
- my $bytes = PublicInbox::SearchIdx::get_val($doc, $col);
+ my $bytes = PublicInbox::Search::int_val($doc, $col);
like($bytes, qr/\A[0-9]+\z/, '$bytes stored as digit');
ok($bytes > 0, '$bytes is > 0');
is($bytes, $smsg->{bytes}, 'bytes Xapian value matches Over');
$col = PublicInbox::Search::UID();
- my $uid = PublicInbox::SearchIdx::get_val($doc, $col);
+ my $uid = PublicInbox::Search::int_val($doc, $col);
is($uid, $smsg->{num}, 'UID column matches {num}');
is($uid, $m->get_docid, 'UID column matches docid');
}
--- /dev/null
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Eml;
+use File::Path qw(mkpath);
+use IO::Handle (); # autoflush
+use POSIX qw(_exit);
+use Cwd qw(getcwd abs_path);
+use File::Spec;
+my $many_root = $ENV{TEST_MANY_ROOT} or
+ plan skip_all => 'TEST_MANY_ROOT not defined';
+my $cwd = getcwd();
+mkpath($many_root);
+-d $many_root or BAIL_OUT "$many_root: $!";
+$many_root = abs_path($many_root);
+$many_root =~ m!\A\Q$cwd\E/! and BAIL_OUT "$many_root must not be in $cwd";
+require_git 2.6;
+require_mods(qw(DBD::SQLite Search::Xapian));
+use_ok 'PublicInbox::V2Writable';
+my $nr_inbox = $ENV{NR_INBOX} // 10;
+my $nproc = $ENV{NPROC} || PublicInbox::V2Writable::detect_nproc() || 2;
+my $indexlevel = $ENV{TEST_INDEXLEVEL} // 'basic';
+diag "NR_INBOX=$nr_inbox NPROC=$nproc TEST_INDEXLEVEL=$indexlevel";
+diag "TEST_MANY_ROOT=$many_root";
+my $level_cfg = $indexlevel eq 'full' ? '' : "\tindexlevel = $indexlevel\n";
+my $pfx = "$many_root/$nr_inbox-$indexlevel";
+mkpath($pfx);
+open my $cfg_fh, '>>', "$pfx/config" or BAIL_OUT $!;
+$cfg_fh->autoflush(1);
+my $v2_init_add = sub {
+ my ($i) = @_;
+ my $ibx = PublicInbox::Inbox->new({
+ inboxdir => "$pfx/test-$i",
+ name => "test-$i",
+ newsgroup => "inbox.comp.test.foo.test-$i",
+ address => [ "test-$i\@example.com" ],
+ url => [ "//example.com/test-$i" ],
+ version => 2,
+ });
+ $ibx->{indexlevel} = $indexlevel if $level_cfg ne '';
+ my $entry = <<EOF;
+[publicinbox "$ibx->{name}"]
+ address = $ibx->{-primary_address}
+ url = $ibx->{url}->[0]
+ newsgroup = $ibx->{newsgroup}
+ inboxdir = $ibx->{inboxdir}
+EOF
+ $entry .= $level_cfg;
+ print $cfg_fh $entry or die $!;
+ my $v2w = PublicInbox::V2Writable->new($ibx, { nproc => 0 });
+ $v2w->init_inbox(0);
+ $v2w->add(PublicInbox::Eml->new(<<EOM));
+Date: Sat, 02 Oct 2010 00:00:00 +0000
+From: Lorelei <l\@example.com>
+To: test-$i\@example.com
+Message-ID: <20101002-000000-$i\@example.com>
+Subject: hello world $i
+
+hi
+EOM
+ $v2w->done;
+};
+
+my @children;
+for my $i (1..$nproc) {
+ my ($r, $w);
+ pipe($r, $w) or BAIL_OUT $!;
+ my $pid = fork;
+ if ($pid == 0) {
+ close $w;
+ while (my $i = <$r>) {
+ chomp $i;
+ $v2_init_add->($i);
+ }
+ _exit(0);
+ }
+ defined $pid or BAIL_OUT "fork: $!";
+ close $r or BAIL_OUT $!;
+ push @children, [ $w, $pid ];
+ $w->autoflush(1);
+}
+
+for my $i (0..$nr_inbox) {
+ print { $children[$i % @children]->[0] } "$i\n" or BAIL_OUT $!;
+}
+
+for my $c (@children) {
+ close $c->[0] or BAIL_OUT "close $!";
+}
+my $i = 0;
+for my $c (@children) {
+ my $pid = waitpid($c->[1], 0);
+ is($?, 0, ++$i.' exited ok');
+}
+ok(close($cfg_fh), 'config written');
+done_testing;