Neither Inboxes nor ExtSearch objects were retrying correctly
when there are live git processes, but the inboxes were getting
rescanned for search or other reasons. Ensure the scan retries
eventually if there's live processes.
We also need to update the cleanup task to detect Xapian shard
count changes, since Xapian ->reopen is enough to detect any
other Xapian changes. Otherwise, we just issue an inexpensive
->reopen call and let Xapian check whether there's anything
worth reopening.
This also lets us eliminate the Devel::Peek dependency.
essential => [ qw(
git
perl
essential => [ qw(
git
perl
Digest::SHA
Encode
ExtUtils::MakeMaker
Digest::SHA
Encode
ExtUtils::MakeMaker
pkg => 'p5-TimeDate',
rpm => 'perl-TimeDate',
},
pkg => 'p5-TimeDate',
rpm => 'perl-TimeDate',
},
- 'Devel::Peek' => {
- deb => 'perl', # libperl5.XX, but the XX varies
- pkg => 'perl5',
- },
'Digest::SHA' => {
deb => 'perl', # libperl5.XX, but the XX varies
pkg => 'perl5',
'Digest::SHA' => {
deb => 'perl', # libperl5.XX, but the XX varies
pkg => 'perl5',
$opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs;
}
$opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs;
}
-# TODO: make Devel::Peek optional, only used for daemon
-my @base_mod = qw(Devel::Peek);
my @over_mod = qw(DBD::SQLite DBI);
my %mod_groups = (
-index => [ @base_mod, @over_mod ],
my @over_mod = qw(DBD::SQLite DBI);
my %mod_groups = (
-index => [ @base_mod, @over_mod ],
'$EXTINDEX_DIR/description missing';
}
'$EXTINDEX_DIR/description missing';
}
+sub search {
+ PublicInbox::Inbox::_cleanup_later($_[0]);
+ $_[0];
+}
+
no warnings 'once';
*base_url = \&PublicInbox::Inbox::base_url;
*smsg_eml = \&PublicInbox::Inbox::smsg_eml;
no warnings 'once';
*base_url = \&PublicInbox::Inbox::base_url;
*smsg_eml = \&PublicInbox::Inbox::smsg_eml;
*recent = \&PublicInbox::Inbox::recent;
*max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
*recent = \&PublicInbox::Inbox::recent;
*max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
-*isrch = *search = \&PublicInbox::Search::reopen;
delete $self->{inflight_c};
_destroy($self, qw(cat_rbuf in out pid));
_destroy($self, qw(chk_rbuf in_c out_c pid_c err_c));
delete $self->{inflight_c};
_destroy($self, qw(cat_rbuf in out pid));
_destroy($self, qw(chk_rbuf in_c out_c pid_c err_c));
- !!($self->{pid} || $self->{pid_c});
+ defined($self->{pid}) || defined($self->{pid_c});
+# returns true if there are pending cat-file processes
sub cleanup_if_unlinked {
my ($self) = @_;
return cleanup($self) if $^O ne 'linux';
# Linux-specific /proc/$PID/maps access
# TODO: support this inside git.git
sub cleanup_if_unlinked {
my ($self) = @_;
return cleanup($self) if $^O ne 'linux';
# Linux-specific /proc/$PID/maps access
# TODO: support this inside git.git
for my $fld (qw(pid pid_c)) {
my $pid = $self->{$fld} // next;
for my $fld (qw(pid pid_c)) {
my $pid = $self->{$fld} // next;
- open my $fh, '<', "/proc/$pid/maps" or next;
+ open my $fh, '<', "/proc/$pid/maps" or return cleanup($self);
+ # n.b. we do not restart for unlinked multi-pack-index
+ # since it's not too huge, and the startup cost may
+ # be higher.
return cleanup($self) if /\.(?:idx|pack) \(deleted\)$/;
}
return cleanup($self) if /\.(?:idx|pack) \(deleted\)$/;
}
# admin will attempt to replace them atomically after compact/vacuum
# and we need to be prepared for that.
my $cleanup_timer;
# admin will attempt to replace them atomically after compact/vacuum
# and we need to be prepared for that.
my $cleanup_timer;
-my $cleanup_avail = -1; # 0, or 1
-my $have_devel_peek;
my $CLEANUP = {}; # string(inbox) -> inbox
sub git_cleanup ($) {
my ($self) = @_;
my $CLEANUP = {}; # string(inbox) -> inbox
sub git_cleanup ($) {
my ($self) = @_;
- if ($self->isa(__PACKAGE__)) {
- # normal Inbox; low startup cost, and likely to have many
- # many so this keeps process/pipe counts in check
- $self->{git}->cleanup if $self->{git};
- } else {
- # ExtSearch, high startup cost if ->ALL, and probably
- # only one per-daemon, so teardown only if required:
- $self->git->cleanup_if_unlinked;
- }
+ my $git = $self->{git} // return undef;
+ # normal inboxes have low startup cost and there may be many, so
+ # keep process+pipe counts in check. ExtSearch may have high startup
+ # cost (e.g. ->ALL) and but likely one per-daemon, so cleanup only
+ # if there's unlinked files
+ my $live = $self->isa(__PACKAGE__) ? $git->cleanup
+ : $git->cleanup_if_unlinked;
+ delete($self->{git}) unless $live;
+ $live;
+# returns true if further checking is required
+sub cleanup_shards { $_[0]->{search} ? $_[0]->{search}->cleanup_shards : undef }
+
sub cleanup_task () {
$cleanup_timer = undef;
my $next = {};
for my $ibx (values %$CLEANUP) {
sub cleanup_task () {
$cleanup_timer = undef;
my $next = {};
for my $ibx (values %$CLEANUP) {
- my $again;
- if ($have_devel_peek) {
- foreach my $f (qw(search)) {
- # we bump refcnt by assigning tmp, here:
- my $tmp = $ibx->{$f} or next;
- next if Devel::Peek::SvREFCNT($tmp) > 2;
- delete $ibx->{$f};
- # refcnt is zero when tmp is out-of-scope
- }
- }
- git_cleanup($ibx);
- if (my $gits = $ibx->{-repo_objs}) {
- foreach my $git (@$gits) {
- $again = 1 if $git->cleanup;
- }
+ my $again = git_cleanup($ibx);
+ $ibx->cleanup_shards and $again = 1;
+ for my $git (@{$ibx->{-repo_objs}}) {
+ $again = 1 if $git->cleanup;
- if ($have_devel_peek) {
- $again ||= !!$ibx->{search};
- }
$next->{"$ibx"} = $ibx if $again;
}
$CLEANUP = $next;
$next->{"$ibx"} = $ibx if $again;
}
$CLEANUP = $next;
+ $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
-sub cleanup_possible () {
+sub _cleanup_later ($) {
# no need to require DS, here, if it were enabled another
# module would've require'd it, already
# no need to require DS, here, if it were enabled another
# module would've require'd it, already
- eval { PublicInbox::DS::in_loop() } or return 0;
-
- eval {
- require Devel::Peek; # needs separate package in Fedora
- $have_devel_peek = 1;
- };
- 1;
-}
-
-sub _cleanup_later ($) {
- my ($self) = @_;
- $cleanup_avail = cleanup_possible() if $cleanup_avail < 0;
- return if $cleanup_avail != 1;
- $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
- $CLEANUP->{"$self"} = $self;
+ if (eval { PublicInbox::DS::in_loop() }) {
+ $cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
+ $CLEANUP->{"$_[0]"} = $_[0]; # $self
+ }
}
sub _set_limiter ($$$) {
}
sub _set_limiter ($$$) {
my (@xdb, $slow_phrase);
load_xapian();
$self->{qp_flags} //= $QP_FLAGS;
my (@xdb, $slow_phrase);
load_xapian();
$self->{qp_flags} //= $QP_FLAGS;
- if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
+ if ($xpfx =~ m!/xapian[0-9]+\z!) {
@xdb = ($X{Database}->new($xpfx));
$self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
} else {
@xdb = ($X{Database}->new($xpfx));
$self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
} else {
+# returns true if a future rescan is desired
+sub cleanup_shards {
+ my ($self) = @_;
+ return unless exists($self->{xdb});
+ my $xpfx = $self->{xpfx};
+ return reopen($self) if $xpfx =~ m!/xapian[0-9]+\z!; # true
+ opendir(my $dh, $xpfx) or return warn("$xpfx gone: $!\n"); # true
+ my $nr = grep(/\A[0-9]+\z/, readdir($dh)) or
+ return warn("$xpfx has no shards\n"); # true
+ return reopen($self) if $nr == ($self->{nshard} // -1);
+ delete($self->{xdb});
+ undef;
+}
+
sub new {
my ($class, $ibx) = @_;
ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
sub new {
my ($class, $ibx) = @_;
ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
SKIP: {
my $d = "$home/extindex-j1";
SKIP: {
my $d = "$home/extindex-j1";
+ my $es = PublicInbox::ExtSearch->new($d);
+ ok(my $nresult0 = $es->mset('z:0..')->size, 'got results');
+ ok(ref($es->{xdb}), '{xdb} created');
+ my $nshards1 = $es->{nshard};
+ is($nshards1, 1, 'correct shard count');
+ my $xdb_str = "$es->{xdb}";
+ ok($es->cleanup_shards, 'cleanup_shards noop');
+ is("$es->{xdb}", $xdb_str, '{xdb} unchanged');
+
my $o = { 2 => \(my $err = '') };
ok(run_script([qw(-xcpdb -R4), $d]), 'xcpdb R4');
my @dirs = glob("$d/ei*/?");
for my $i (0..3) {
is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] created");
}
my $o = { 2 => \(my $err = '') };
ok(run_script([qw(-xcpdb -R4), $d]), 'xcpdb R4');
my @dirs = glob("$d/ei*/?");
for my $i (0..3) {
is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] created");
}
+ is($es->cleanup_shards, undef, 'cleanup_shards cleaned');
+ ok(!defined($es->{xdb}), 'old {xdb} gone');
+ is($es->cleanup_shards, undef, 'cleanup_shards clean idempotent');
+ is($es->mset('z:0..')->size, $nresult0, 'new shards, same results');
+ ok($es->cleanup_shards, 'cleanup_shards true after open');
+
for my $i (4..5) {
is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]");
}
for my $i (4..5) {
is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]");
}