use PublicInbox::Spawn qw(spawn nodatacow_dir);
use PublicInbox::Git qw(git_unquote);
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
-our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size prepare_stack);
+our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size prepare_stack
+ index_text term_generator add_val);
my $X = \%PublicInbox::Search::X;
-my ($DB_CREATE_OR_OPEN, $DB_OPEN);
+our ($DB_CREATE_OR_OPEN, $DB_OPEN);
our $DB_NO_SYNC = 0;
our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff : 1_000_000;
use constant DEBUG => !!$ENV{DEBUG};
$self->{term_generator} //= do {
my $tg = $X->{TermGenerator}->new;
- $tg->set_stemmer($self->stemmer);
+ $tg->set_stemmer(PublicInbox::Search::stemmer($self));
$tg;
}
}
my $doc = _get_doc($self, $docid, $oid) or return;
eval { $doc->remove_term('O'.$eidx_key) };
warn "W: ->remove_term O$eidx_key: $@\n" if $@;
- for my $l ($eml->header_raw('List-Id')) {
+ for my $l ($eml ? $eml->header_raw('List-Id') : ()) {
$l =~ /<([^>]+)>/ or next;
my $lid = lc $1;
eval { $doc->remove_term('G' . $lid) };
my ($self, $opt) = @_;
delete $self->{lock_path} if $opt->{-skip_lock};
$self->with_umask(\&_index_sync, $self, $opt);
- if ($opt->{reindex}) {
+ if ($opt->{reindex} && !$opt->{quit}) {
my %again = %$opt;
delete @again{qw(rethread reindex)};
index_sync($self, \%again);
+ $opt->{quit} = $again{quit}; # propagate to caller
}
}
if (my $pr = $sync->{-opt}->{-progress}) {
$pr->("indexed $nr/$sync->{ntodo}\n") if $nr;
}
- if (!$stk) { # more to come
+ if (!$stk && !$sync->{quit}) { # more to come
begin_txn_lazy($self);
$self->{mm}->{dbh}->begin_work;
}
if (my @leftovers = keys %{delete($sync->{D}) // {}}) {
warn('W: unindexing '.scalar(@leftovers)." leftovers\n");
for my $oid (@leftovers) {
+ last if $sync->{quit};
$oid = unpack('H*', $oid);
$git->cat_async($oid, \&unindex_both, $sync);
}
}
while (my ($f, $at, $ct, $oid, $cur_cmt) = $stk->pop_rec) {
my $arg = { %$sync, cur_cmt => $cur_cmt };
+ last if $sync->{quit};
if ($f eq 'm') {
$arg->{autime} = $at;
$arg->{cotime} = $ct;
$git->cat_async($oid, \&unindex_both, $arg);
}
}
- v1_checkpoint($self, $sync, $stk);
+ v1_checkpoint($self, $sync, $sync->{quit} ? undef : $stk);
}
sub log2stack ($$$) {
ref($reindex) eq 'HASH' ? $reindex->{from} : '';
}
+sub quit_cb ($) {
+ my ($sync) = @_;
+ sub {
+ # we set {-opt}->{quit} too, so ->index_sync callers
+ # can abort multi-inbox loops this way
+ $sync->{quit} = $sync->{-opt}->{quit} = 1;
+ warn "gracefully quitting\n";
+ }
+}
+
# indexes all unindexed messages (v1 only)
sub _index_sync {
my ($self, $opt) = @_;
$ibx->git->batch_prepare;
my $pr = $opt->{-progress};
my $sync = { reindex => $opt->{reindex}, -opt => $opt, ibx => $ibx };
+ my $quit = quit_cb($sync);
+ local $SIG{QUIT} = $quit;
+ local $SIG{INT} = $quit;
+ local $SIG{TERM} = $quit;
my $xdb = $self->begin_txn_lazy;
$self->{oidx}->rethread_prepare($opt);
my $mm = _msgmap_init($self);
my $stk = prepare_stack($sync, $range);
$sync->{ntodo} = $stk ? $stk->num_records : 0;
$pr->("$sync->{ntodo}\n") if $pr; # continue previous line
- process_stack($self, $sync, $stk);
+ process_stack($self, $sync, $stk) if !$sync->{quit};
}
sub DESTROY {
$self;
}
+# ensure there's no stale Xapian docs by treating $over as canonical
+sub over_check {
+ my ($self, $over) = @_;
+ begin_txn_lazy($self);
+ my $sth = $over->dbh->prepare(<<'');
+SELECT COUNT(*) FROM over WHERE num = ?
+
+ my $xdb = $self->{xdb};
+ my $cur = $xdb->postlist_begin('');
+ my $end = $xdb->postlist_end('');
+ my $xdir = $self->xdir;
+ for (; $cur != $end; $cur++) {
+ my $docid = $cur->get_docid;
+ $sth->execute($docid);
+ my $x = $sth->fetchrow_array;
+ next if $x > 0;
+ warn "I: removing $xdir #$docid, not in `over'\n";
+ $xdb->delete_document($docid);
+ }
+}
+
1;