essential => [ qw(
git
perl
- Devel::Peek
Digest::SHA
Encode
ExtUtils::MakeMaker
pkg => 'p5-TimeDate',
rpm => 'perl-TimeDate',
},
- 'Devel::Peek' => {
- deb => 'perl', # libperl5.XX, but the XX varies
- pkg => 'perl5',
- },
'Digest::SHA' => {
deb => 'perl', # libperl5.XX, but the XX varies
pkg => 'perl5',
$opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs;
}
-# TODO: make Devel::Peek optional, only used for daemon
-my @base_mod = qw(Devel::Peek);
+my @base_mod = ();
my @over_mod = qw(DBD::SQLite DBI);
my %mod_groups = (
-index => [ @base_mod, @over_mod ],
'$EXTINDEX_DIR/description missing';
}
+sub search {
+ PublicInbox::Inbox::_cleanup_later($_[0]);
+ $_[0];
+}
+
no warnings 'once';
*base_url = \&PublicInbox::Inbox::base_url;
*smsg_eml = \&PublicInbox::Inbox::smsg_eml;
*recent = \&PublicInbox::Inbox::recent;
*max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
-*isrch = *search = \&PublicInbox::Search::reopen;
+*isrch = \&search;
1;
delete $self->{inflight_c};
_destroy($self, qw(cat_rbuf in out pid));
_destroy($self, qw(chk_rbuf in_c out_c pid_c err_c));
- !!($self->{pid} || $self->{pid_c});
+ defined($self->{pid}) || defined($self->{pid_c});
}
$ent;
}
+# returns true if there are pending cat-file processes
sub cleanup_if_unlinked {
my ($self) = @_;
return cleanup($self) if $^O ne 'linux';
# Linux-specific /proc/$PID/maps access
# TODO: support this inside git.git
+ my $ret = 0;
for my $fld (qw(pid pid_c)) {
my $pid = $self->{$fld} // next;
- open my $fh, '<', "/proc/$pid/maps" or next;
+ open my $fh, '<', "/proc/$pid/maps" or return cleanup($self);
while (<$fh>) {
+ # n.b. we do not restart for unlinked multi-pack-index
+ # since it's not too huge, and the startup cost may
+ # be higher.
return cleanup($self) if /\.(?:idx|pack) \(deleted\)$/;
}
+ ++$ret;
}
+ $ret;
}
1;
# admin will attempt to replace them atomically after compact/vacuum
# and we need to be prepared for that.
my $cleanup_timer;
-my $cleanup_avail = -1; # 0, or 1
-my $have_devel_peek;
my $CLEANUP = {}; # string(inbox) -> inbox
sub git_cleanup ($) {
my ($self) = @_;
- if ($self->isa(__PACKAGE__)) {
- # normal Inbox; low startup cost, and likely to have many
- # many so this keeps process/pipe counts in check
- $self->{git}->cleanup if $self->{git};
- } else {
- # ExtSearch, high startup cost if ->ALL, and probably
- # only one per-daemon, so teardown only if required:
- $self->git->cleanup_if_unlinked;
- }
+ my $git = $self->{git} // return undef;
+ # normal inboxes have low startup cost and there may be many, so
+ # keep process+pipe counts in check. ExtSearch may have high startup
+ # cost (e.g. ->ALL) and but likely one per-daemon, so cleanup only
+ # if there's unlinked files
+ my $live = $self->isa(__PACKAGE__) ? $git->cleanup
+ : $git->cleanup_if_unlinked;
+ delete($self->{git}) unless $live;
+ $live;
}
+# returns true if further checking is required
+sub cleanup_shards { $_[0]->{search} ? $_[0]->{search}->cleanup_shards : undef }
+
sub cleanup_task () {
$cleanup_timer = undef;
my $next = {};
for my $ibx (values %$CLEANUP) {
- my $again;
- if ($have_devel_peek) {
- foreach my $f (qw(search)) {
- # we bump refcnt by assigning tmp, here:
- my $tmp = $ibx->{$f} or next;
- next if Devel::Peek::SvREFCNT($tmp) > 2;
- delete $ibx->{$f};
- # refcnt is zero when tmp is out-of-scope
- }
- }
- git_cleanup($ibx);
- if (my $gits = $ibx->{-repo_objs}) {
- foreach my $git (@$gits) {
- $again = 1 if $git->cleanup;
- }
+ my $again = git_cleanup($ibx);
+ $ibx->cleanup_shards and $again = 1;
+ for my $git (@{$ibx->{-repo_objs}}) {
+ $again = 1 if $git->cleanup;
}
check_inodes($ibx);
- if ($have_devel_peek) {
- $again ||= !!$ibx->{search};
- }
$next->{"$ibx"} = $ibx if $again;
}
$CLEANUP = $next;
+ $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
}
-sub cleanup_possible () {
+sub _cleanup_later ($) {
# no need to require DS, here, if it were enabled another
# module would've require'd it, already
- eval { PublicInbox::DS::in_loop() } or return 0;
-
- eval {
- require Devel::Peek; # needs separate package in Fedora
- $have_devel_peek = 1;
- };
- 1;
-}
-
-sub _cleanup_later ($) {
- my ($self) = @_;
- $cleanup_avail = cleanup_possible() if $cleanup_avail < 0;
- return if $cleanup_avail != 1;
- $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
- $CLEANUP->{"$self"} = $self;
+ if (eval { PublicInbox::DS::in_loop() }) {
+ $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
+ $CLEANUP->{"$_[0]"} = $_[0]; # $self
+ }
}
sub _set_limiter ($$$) {
my (@xdb, $slow_phrase);
load_xapian();
$self->{qp_flags} //= $QP_FLAGS;
- if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
+ if ($xpfx =~ m!/xapian[0-9]+\z!) {
@xdb = ($X{Database}->new($xpfx));
$self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
} else {
};
}
+# returns true if a future rescan is desired
+sub cleanup_shards {
+ my ($self) = @_;
+ return unless exists($self->{xdb});
+ my $xpfx = $self->{xpfx};
+ return reopen($self) if $xpfx =~ m!/xapian[0-9]+\z!; # true
+ opendir(my $dh, $xpfx) or return warn("$xpfx gone: $!\n"); # true
+ my $nr = grep(/\A[0-9]+\z/, readdir($dh)) or
+ return warn("$xpfx has no shards\n"); # true
+ return reopen($self) if $nr == ($self->{nshard} // -1);
+ delete($self->{xdb});
+ undef;
+}
+
sub new {
my ($class, $ibx) = @_;
ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
SKIP: {
my $d = "$home/extindex-j1";
+ my $es = PublicInbox::ExtSearch->new($d);
+ ok(my $nresult0 = $es->mset('z:0..')->size, 'got results');
+ ok(ref($es->{xdb}), '{xdb} created');
+ my $nshards1 = $es->{nshard};
+ is($nshards1, 1, 'correct shard count');
+ my $xdb_str = "$es->{xdb}";
+ ok($es->cleanup_shards, 'cleanup_shards noop');
+ is("$es->{xdb}", $xdb_str, '{xdb} unchanged');
+
my $o = { 2 => \(my $err = '') };
ok(run_script([qw(-xcpdb -R4), $d]), 'xcpdb R4');
my @dirs = glob("$d/ei*/?");
for my $i (0..3) {
is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] created");
}
+ is($es->cleanup_shards, undef, 'cleanup_shards cleaned');
+ ok(!defined($es->{xdb}), 'old {xdb} gone');
+ is($es->cleanup_shards, undef, 'cleanup_shards clean idempotent');
+ is($es->mset('z:0..')->size, $nresult0, 'new shards, same results');
+ ok($es->cleanup_shards, 'cleanup_shards true after open');
+
for my $i (4..5) {
is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]");
}