# is the messages visible in the inbox currently being indexed?
# return the number if so
-sub cur_ibx_xnum ($$) {
- my ($req, $bref) = @_;
+sub cur_ibx_xnum ($$;$) {
+ my ($req, $bref, $mismatch) = @_;
my $ibx = $req->{ibx} or die 'BUG: current {ibx} missing';
$req->{eml} = PublicInbox::Eml->new($bref);
my ($id, $prev);
while (my $x = $ibx->over->next_by_mid($mid, \$id, \$prev)) {
return $x->{num} if $x->{blob} eq $req->{oid};
+ push @$mismatch, $x if $mismatch;
}
}
undef;
blob => $oid,
}, 'PublicInbox::Smsg';
$new_smsg->set_bytes($$bref, $size);
- defined($req->{xnum} = cur_ibx_xnum($req, $bref)) or return;
++${$req->{nr}};
+ my $mismatch = [];
+ $req->{xnum} = cur_ibx_xnum($req, $bref, $mismatch) // do {
+ warn "# deleted\n";
+ warn "# mismatch $_->{blob}\n" for @$mismatch;
+ ${$req->{latest_cmt}} = $req->{cur_cmt} //
+ die "BUG: {cur_cmt} unset ($oid)\n";
+ return;
+ };
do_step($req);
}
return $locked if $locked eq $cur;
}
my ($pid, $time, $euid, $ident) = split(/-/, $cur, 4);
- my $t = strftime('%Y-%m-%d %k:%M:%S', gmtime($time));
+ my $t = strftime('%Y-%m-%d %k:%M %z', localtime($time));
+ local $self->{current_info} = 'eidxq';
if ($euid == $> && $ident eq host_ident) {
if (kill(0, $pid)) {
warn <<EOM; return;
-I: PID:$pid (re)indexing Xapian since $t, it will continue our work
+I: PID:$pid (re)indexing since $t, it will continue our work
EOM
}
if ($!{ESRCH}) {
$r = $sync->{self}->{oidx}->dbh->selectall_arrayref(
<<EOS, undef, $ibx->{-ibx_id});
SELECT docid,xnum,oidbin FROM xref3
-WHERE ibx_id = ? AND xnum $lt_or_gt LIMIT $lim
+WHERE ibx_id = ? AND $lt_or_gt LIMIT $lim
EOS
return if $sync->{quit};
for (@$r) { # hopefully rare, not worth optimizing:
my $slice = 10000;
my $opt = { limit => $slice };
my ($beg, $end) = (1, $slice);
- my $err = sync_inbox($self, $sync, $ibx) and return;
- my $max = $ibx->over->max;
+ my $ekey = $ibx->eidx_key;
+ my ($max, $max0);
+ do {
+ $max0 = $ibx->mm->num_highwater;
+ sync_inbox($self, $sync, $ibx) and return; # warned
+ $max = $ibx->mm->num_highwater;
+ return if $sync->{quit};
+ } while ($max > $max0 &&
+ warn("# $ekey moved $max0..$max, resyncing..\n"));
$end = $max if $end > $max;
# first, check if we missed any messages in target $ibx
my $msgs;
my $pr = $sync->{-opt}->{-progress};
- my $ekey = $ibx->eidx_key;
local $sync->{-regen_fmt} = "$ekey checking %u/$max\n";
${$sync->{nr}} = 0;
my $fast = $sync->{-opt}->{fast};
- my $dsu; # _unref_stale_range (< $lo) called
+ my $usr; # _unref_stale_range (< $lo) called
my ($lo, $hi);
while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end, $opt)})) {
${$sync->{nr}} = $beg;
reindex_checkpoint($self, $sync); # release lock
}
($lo, $hi) = ($msgs->[0]->{num}, $msgs->[-1]->{num});
- $dsu //= _unref_stale_range($sync, $ibx, "< $lo");
+ $usr //= _unref_stale_range($sync, $ibx, "xnum < $lo");
my $x3a = $self->{oidx}->dbh->selectall_arrayref(
<<"", undef, $ibx_id, $lo, $hi);
SELECT xnum,oidbin,docid FROM xref3 WHERE
for my $num (@$docids) {
$self->{oidx}->eidxq_add($num);
}
- return if $sync->{quit};
}
+ return if $sync->{quit};
}
- return if $sync->{quit};
next unless scalar keys %x3m;
$self->git->async_wait_all; # wait for reindex_unseen
my ($xnum, $hex) = unpack('JH*', $k);
my $bin = pack('H*', $hex);
my $exp = $mismatch{$xnum};
+ if (defined $exp) {
+ my $smsg = $ibx->over->get_art($xnum) // next;
+ # $xnum may be expired by another process
+ if ($smsg->{blob} eq $hex) {
+ warn <<"";
+BUG: (non-fatal) $ekey #$xnum $smsg->{blob} still matches (old exp: $exp)
+
+ next;
+ } # else: continue to unref
+ }
my $m = defined($exp) ? "mismatch (!= $exp)" : 'stale';
warn("# $xnum:$hex (#@$docids): $m\n");
for my $i (@$docids) {
_unref_doc($sync, $i, $ibx, $xnum, $bin);
}
+ return if $sync->{quit};
}
}
- _unref_stale_range($sync, $ibx, "> $hi") if defined($hi);
+ defined($hi) and ($hi < $max) and
+ _unref_stale_range($sync, $ibx, "xnum > $hi AND xnum <= $max");
}
sub _reindex_inbox ($$$) {