summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
14e6064)
We'll count the number of log changes (regardless of index or
unindex) and only attach inboxes to ExtSearchIdx objects when
they get new work. We'll also reduce lock bouncing and only
update external indices after all per-inbox indexing is done.
This also updates existing v2 indexing/unindexing callers
to be more consistent and ensures unindex log entries update
per-inbox last commit information.
$idx = PublicInbox::SearchIdx->new($ibx, 1);
}
$idx->index_sync($opt);
$idx = PublicInbox::SearchIdx->new($ibx, 1);
}
$idx->index_sync($opt);
+ $idx->{nidx} // 0; # returns number processed
}
sub progress_prepare ($) {
}
sub progress_prepare ($) {
$smsg->{num} = index_mm($self, $eml, $oid, $sync) or
die "E: could not generate NNTP article number for $oid";
add_message($self, $eml, $smsg, $sync);
$smsg->{num} = index_mm($self, $eml, $oid, $sync) or
die "E: could not generate NNTP article number for $oid";
add_message($self, $eml, $smsg, $sync);
my $cur_cmt = $sync->{cur_cmt} // die 'BUG: {cur_cmt} missing';
${$sync->{latest_cmt}} = $cur_cmt;
}
my $cur_cmt = $sync->{cur_cmt} // die 'BUG: {cur_cmt} missing';
${$sync->{latest_cmt}} = $cur_cmt;
}
if (defined(my $cur_cmt = $sync->{cur_cmt})) {
${$sync->{latest_cmt}} = $cur_cmt;
}
if (defined(my $cur_cmt = $sync->{cur_cmt})) {
${$sync->{latest_cmt}} = $cur_cmt;
}
$mm_tmp->atfork_parent if $mm_tmp;
}
$mm_tmp->atfork_parent if $mm_tmp;
}
+sub index_finalize ($$) {
+ my ($arg, $index) = @_;
+ ++$arg->{self}->{nidx};
+ if (defined(my $cur = $arg->{cur_cmt})) {
+ ${$arg->{latest_cmt}} = $cur;
+ } elsif ($index) {
+ die 'BUG: {cur_cmt} missing';
+ } # else { unindexing @leftovers doesn't set {cur_cmt}
+}
+
sub index_oid { # cat_async callback
my ($bref, $oid, $type, $size, $arg) = @_;
sub index_oid { # cat_async callback
my ($bref, $oid, $type, $size, $arg) = @_;
- return if is_bad_blob($oid, $type, $size, $arg->{oid});
+ is_bad_blob($oid, $type, $size, $arg->{oid}) and
+ return index_finalize($arg, 1); # size == 0 purged returns here
my $self = $arg->{self};
local $self->{current_info} = "$self->{current_info} $oid";
my $self = $arg->{self};
local $self->{current_info} = "$self->{current_info} $oid";
- return if $size == 0; # purged
my ($num, $mid0);
my $eml = PublicInbox::Eml->new($$bref);
my $mids = mids($eml);
my ($num, $mid0);
my $eml = PublicInbox::Eml->new($$bref);
my $mids = mids($eml);
if (do_idx($self, $bref, $eml, $smsg)) {
${$arg->{need_checkpoint}} = 1;
}
if (do_idx($self, $bref, $eml, $smsg)) {
${$arg->{need_checkpoint}} = 1;
}
- ${$arg->{latest_cmt}} = $arg->{cur_cmt} // die 'BUG: {cur_cmt} missing';
+ index_finalize($arg, 1);
}
# only update last_commit for $i on reindex iff newer than current
}
# only update last_commit for $i on reindex iff newer than current
}
sub unindex_oid ($$;$) { # git->cat_async callback
}
sub unindex_oid ($$;$) { # git->cat_async callback
- my ($bref, $oid, $type, $size, $sync) = @_;
- return if is_bad_blob($oid, $type, $size, $sync->{oid});
- my $self = $sync->{self};
+ my ($bref, $oid, $type, $size, $arg) = @_;
+ is_bad_blob($oid, $type, $size, $arg->{oid}) and
+ return index_finalize($arg, 0);
+ my $self = $arg->{self};
local $self->{current_info} = "$self->{current_info} $oid";
local $self->{current_info} = "$self->{current_info} $oid";
- my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef;
+ my $unindexed = $arg->{in_unindex} ? $arg->{unindexed} : undef;
my $mm = $self->{mm};
my $mids = mids(PublicInbox::Eml->new($bref));
undef $$bref;
my $mm = $self->{mm};
my $mids = mids(PublicInbox::Eml->new($bref));
undef $$bref;
}
unindex_oid_aux($self, $oid, $mid);
}
}
unindex_oid_aux($self, $oid, $mid);
}
+ index_finalize($arg, 0);
}
sub git { $_[0]->{ibx}->git }
}
sub git { $_[0]->{ibx}->git }
PublicInbox::Admin::require_or_die('-index');
unless (@ibxs) { print STDERR $help; exit 1 }
PublicInbox::Admin::require_or_die('-index');
unless (@ibxs) { print STDERR $help; exit 1 }
-my (@eidx_dir, %eidx_seen);
my $update_extindex = $opt->{'update-extindex'};
if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) {
# extindex and normal inboxes may have different owners
my $update_extindex = $opt->{'update-extindex'};
if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) {
# extindex and normal inboxes may have different owners
} else {
die "extindex `$ei_name' not configured or found\n";
}
} else {
die "extindex `$ei_name' not configured or found\n";
}
- $eidx_seen{$topdir} //= push(@eidx_dir, $topdir);
+ $eidx_seen{$topdir} //=
+ push(@eidx, PublicInbox::ExtSearchIdx->new($topdir));
}
my $mods = {};
my @eidx_unconfigured;
}
my $mods = {};
my @eidx_unconfigured;
$ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ?
'full' : $detected);
PublicInbox::Admin::scan_ibx_modules($mods, $ibx);
$ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ?
'full' : $detected);
PublicInbox::Admin::scan_ibx_modules($mods, $ibx);
- if (@eidx_dir && $ibx->{-unconfigured}) {
+ if (@eidx && $ibx->{-unconfigured}) {
push @eidx_unconfigured, " $ibx->{inboxdir}\n";
}
}
push @eidx_unconfigured, " $ibx->{inboxdir}\n";
}
}
EOL
$ibx_opt = { %$opt, sequential_shard => $v };
}
EOL
$ibx_opt = { %$opt, sequential_shard => $v };
}
- PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
+ my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
last if $ibx_opt->{quit};
if (my $copt = $opt->{compact_opt}) {
local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard};
PublicInbox::Xapcmd::run($ibx, 'compact', $copt);
}
last if $ibx_opt->{quit};
if (my $copt = $opt->{compact_opt}) {
local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard};
PublicInbox::Xapcmd::run($ibx, 'compact', $copt);
}
- next if $ibx->{-unconfigured};
last if $ibx_opt->{quit};
last if $ibx_opt->{quit};
- for my $dir (@eidx_dir) {
- my $eidx = PublicInbox::ExtSearchIdx->new($dir);
+ next if $ibx->{-unconfigured} || !$nidx;
+ for my $eidx (@eidx) {
$eidx->attach_inbox($ibx);
$eidx->attach_inbox($ibx);
- $eidx->eidx_sync($ibx_opt);
- last if $ibx_opt->{quit};
+$opt->{-no_fsync} = 1 if !$opt->{fsync};
+my $pr = $opt->{-progress};
+for my $eidx (@eidx) {
+ $pr->("indexing $eidx->{topdir} ...\n") if $pr;
+ $eidx->eidx_sync($opt);
+ last if $opt->{quit};
+}