-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Detached/external index cross inbox search indexing support
use Sys::Hostname qw(hostname);
use POSIX qw(strftime);
use PublicInbox::Search;
-use PublicInbox::SearchIdx qw(crlf_adjust prepare_stack is_ancestor
- is_bad_blob);
+use PublicInbox::SearchIdx qw(prepare_stack is_ancestor is_bad_blob);
use PublicInbox::OverIdx;
use PublicInbox::MiscIdx;
use PublicInbox::MID qw(mids);
my ($req) = @_;
my $self = $req->{self};
my $new_smsg = $req->{new_smsg};
-
- # {raw_bytes} may be unset, so just use {bytes}
my $n = $self->{transact_bytes} += $new_smsg->{bytes};
# set flag for PublicInbox::V2Writable::index_todo:
if (my $new_smsg = $req->{new_smsg}) { # 'm' on cross-posted message
my $xnum = $req->{xnum};
$self->{oidx}->add_xref3($docid, $xnum, $oid, $eidx_key);
- $idx->shard_add_eidx_info($docid, $eidx_key, $eml);
+ $idx->ipc_do('add_eidx_info', $docid, $eidx_key, $eml);
check_batch_limit($req);
} else { # 'd'
my $rm_eidx_info;
\$rm_eidx_info);
if ($nr == 0) {
$self->{oidx}->eidxq_del($docid);
- $idx->shard_remove($docid);
+ $idx->ipc_do('xdb_remove', $docid);
} elsif ($rm_eidx_info) {
- $idx->shard_remove_eidx_info($docid, $eidx_key, $eml);
+ $idx->ipc_do('remove_eidx_info',
+ $docid, $eidx_key, $eml);
$self->{oidx}->eidxq_add($docid); # yes, add
}
}
my $oid = $new_smsg->{blob};
my $ibx = delete $req->{ibx} or die 'BUG: {ibx} unset';
$self->{oidx}->add_xref3($docid, $req->{xnum}, $oid, $ibx->eidx_key);
- $idx->index_raw(undef, $eml, $new_smsg, $ibx->eidx_key);
+ $idx->index_eml($eml, $new_smsg, $ibx->eidx_key);
check_batch_limit($req);
}
my $new_smsg = $req->{new_smsg} = bless {
blob => $oid,
}, 'PublicInbox::Smsg';
- $new_smsg->{bytes} = $size + crlf_adjust($$bref);
+ $new_smsg->set_bytes($$bref, $size);
defined($req->{xnum} = cur_ibx_xnum($req, $bref)) or return;
++${$req->{nr}};
do_step($req);
}
} else {
warn "I: remove #$docid $eidx_key @oid\n";
- $self->idx_shard($docid)->shard_remove($docid);
+ $self->idx_shard($docid)->ipc_do('xdb_remove', $docid);
}
}
my $top_smsg = pop @$stable;
$top_smsg == $smsg or die 'BUG: top_smsg != smsg';
my $ibx = _ibx_for($self, $sync, $smsg);
- $idx->index_raw(undef, $eml, $smsg, $ibx->eidx_key);
+ $idx->index_eml($eml, $smsg, $ibx->eidx_key);
for my $x (reverse @$stable) {
$ibx = _ibx_for($self, $sync, $x);
my $hdr = delete $x->{hdr} // die 'BUG: no {hdr}';
- $idx->shard_add_eidx_info($docid, $ibx->eidx_key, $hdr);
+ $idx->ipc_do('add_eidx_info', $docid, $ibx->eidx_key, $hdr);
}
return if $nr == 1; # likely, all good
my $remain = $self->{oidx}->remove_xref3($docid, $expect_oid);
if ($remain == 0) {
warn "W: #$docid gone or corrupted\n";
- $self->idx_shard($docid)->shard_remove($docid);
+ $self->idx_shard($docid)->ipc_do('xdb_remove', $docid);
} elsif (my $next_oid = $req->{xr3r}->[++$req->{ix}]->[2]) {
$self->git->cat_async($next_oid, \&_reindex_oid, $req);
} else {
warn "BUG: #$docid gone (UNEXPECTED)\n";
- $self->idx_shard($docid)->shard_remove($docid);
+ $self->idx_shard($docid)->ipc_do('xdb_remove', $docid);
}
return;
}
my $ci = $self->{current_info};
local $self->{current_info} = "$ci #$docid $oid";
my $re_smsg = bless { blob => $oid }, 'PublicInbox::Smsg';
- $re_smsg->{bytes} = $size + crlf_adjust($$bref);
+ $re_smsg->set_bytes($$bref, $size);
my $eml = PublicInbox::Eml->new($bref);
$re_smsg->populate($eml, { autime => $orig_smsg->{ds},
cotime => $orig_smsg->{ts} });
BUG? #$docid $smsg->{blob} is not referenced by inboxes during reindex
$self->{oidx}->delete_by_num($docid);
- $self->idx_shard($docid)->shard_remove($docid);
+ $self->idx_shard($docid)->ipc_do('xdb_remove', $docid);
return;
}
my $dbh = $self->{oidx}->dbh;
my $tot = $dbh->selectrow_array('SELECT COUNT(*) FROM eidxq') or return;
${$sync->{nr}} = 0;
- $sync->{-regen_fmt} = "%u/$tot\n";
+ local $sync->{-regen_fmt} = "%u/$tot\n";
my $pr = $sync->{-opt}->{-progress};
if ($pr) {
my $min = $dbh->selectrow_array('SELECT MIN(docid) FROM eidxq');
my $self = $req->{self} // die 'BUG: {self} unset';
local $self->{current_info} = "$self->{current_info} $oid";
my $new_smsg = bless { blob => $oid, }, 'PublicInbox::Smsg';
- $new_smsg->{bytes} = $size + crlf_adjust($$bref);
+ $new_smsg->set_bytes($$bref, $size);
my $eml = $req->{eml} = PublicInbox::Eml->new($bref);
$req->{new_smsg} = $new_smsg;
$req->{chash} = content_hash($eml);
my $msgs;
my $pr = $sync->{-opt}->{-progress};
my $ekey = $ibx->eidx_key;
- $sync->{-regen_fmt} = "$ekey checking unseen %u/".$ibx->over->max."\n";
+ local $sync->{-regen_fmt} =
+ "$ekey checking unseen %u/".$ibx->over->max."\n";
${$sync->{nr}} = 0;
while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end)})) {
my $pr = $sync->{-opt}->{-progress};
my $fetching;
my $ekey = $ibx->eidx_key;
- $sync->{-regen_fmt} =
+ local $sync->{-regen_fmt} =
"$ekey check stale/missing %u/".$ibx->over->max."\n";
${$sync->{nr}} = 0;
do {
if (scalar(@$xr3) == 0) { # all gone
$self->{oidx}->delete_by_num($docid);
$self->{oidx}->eidxq_del($docid);
- $idx->shard_remove($docid);
+ $idx->ipc_do('xdb_remove', $docid);
} else { # enqueue for reindex of remaining messages
- $idx->shard_remove_eidx_info($docid,
- $ibx->eidx_key);
+ $idx->ipc_do('remove_eidx_info',
+ $docid, $ibx->eidx_key);
$self->{oidx}->eidxq_add($docid); # yes, add
}
}
eidxq_process($self, $sync) unless $sync->{quit};
}
+sub sync_inbox {
+ my ($self, $sync, $ibx) = @_;
+ my $err = _sync_inbox($self, $sync, $ibx);
+ delete @$ibx{qw(mm over)};
+ warn $err, "\n" if defined($err);
+}
+
sub eidx_sync { # main entry point
my ($self, $opt) = @_;
$ibx->{-ibx_id} //= $self->{oidx}->ibx_id($ibx->eidx_key);
}
if (delete($opt->{reindex})) {
- $sync->{checkpoint_unlocks} = 1;
+ local $sync->{checkpoint_unlocks} = 1;
eidx_reindex($self, $sync);
}
# don't use $_ here, it'll get clobbered by reindex_checkpoint
- for my $ibx (@{$self->{ibx_list}}) {
- last if $sync->{quit};
- my $err = _sync_inbox($self, $sync, $ibx);
- delete @$ibx{qw(mm over)};
- warn $err, "\n" if defined($err);
+ if ($opt->{scan} // 1) {
+ for my $ibx (@{$self->{ibx_list}}) {
+ last if $sync->{quit};
+ sync_inbox($self, $sync, $ibx);
+ }
}
$self->{oidx}->rethread_done($opt) unless $sync->{quit};
eidxq_process($self, $sync) unless $sync->{quit};
eidxq_release($self);
- PublicInbox::V2Writable::done($self);
+ done($self);
+ $sync; # for eidx_watch
}
sub update_last_commit { # overrides V2Writable
return if $self->{idx_shards};
$self->git->cleanup;
-
+ my $mode = 0644;
my $ALL = $self->git->{git_dir}; # ALL.git
- PublicInbox::Import::init_bare($ALL) unless -d $ALL;
+ my $old = -d $ALL;
+ if ($opt->{-private}) { # LeiStore
+ $mode = 0600;
+ if (!$old) {
+ umask 077; # don't bother restoring
+ PublicInbox::Import::init_bare($ALL);
+ $self->git->qx(qw(config core.sharedRepository 0600));
+ }
+ } else {
+ PublicInbox::Import::init_bare($ALL) unless $old;
+ }
my $info_dir = "$ALL/objects/info";
my $alt = "$info_dir/alternates";
- my $mode = 0644;
my (@old, @new, %seen); # seen: st_dev + st_ino
if (-e $alt) {
open(my $fh, '<', $alt) or die "open $alt: $!";
$mode = (stat($fh))[2] & 07777;
while (my $line = <$fh>) {
chomp(my $d = $line);
+
+ # expand relative path (/local/ stuff)
+ substr($d, 0, 3) eq '../' and
+ $d = "$ALL/objects/$d";
if (my @st = stat($d)) {
next if $seen{"$st[0]\0$st[1]"}++;
} else {
push @old, $line;
}
}
+
+ # for LeiStore, and possibly some mirror-only state
+ if (opendir(my $dh, my $local = "$self->{topdir}/local")) {
+ # highest numbered epoch first
+ for my $n (sort { $b <=> $a } map { substr($_, 0, -4) + 0 }
+ grep(/\A[0-9]+\.git\z/, readdir($dh))) {
+ my $d = "$local/$n.git/objects"; # absolute path
+ if (my @st = stat($d)) {
+ next if $seen{"$st[0]\0$st[1]"}++;
+ # favor relative paths for rename-friendliness
+ push @new, "../../local/$n.git/objects\n";
+ } else {
+ warn "W: stat($d) failed: $!\n";
+ }
+ }
+ }
for my $ibx (@{$self->{ibx_list}}) {
my $line = $ibx->git->{git_dir} . "/objects\n";
chomp(my $d = $line);
$self->with_umask(\&_idx_init, $self, $opt);
$self->{oidx}->begin_lazy;
$self->{oidx}->eidx_prep;
+ $self->git->batch_prepare;
$self->{midx}->begin_txn;
}
+sub _watch_commit { # PublicInbox::DS::add_timer callback
+ my ($self) = @_;
+ delete $self->{-commit_timer};
+ eidxq_process($self, $self->{-watch_sync});
+ eidxq_release($self);
+ delete local $self->{-watch_sync}->{-regen_fmt};
+ reindex_checkpoint($self, $self->{-watch_sync});
+
+ # call event_step => done unless commit_timer is armed
+ PublicInbox::DS::requeue($self);
+}
+
+sub on_inbox_unlock { # called by PublicInbox::InboxIdle
+ my ($self, $ibx) = @_;
+ my $opt = $self->{-watch_sync}->{-opt};
+ my $pr = $opt->{-progress};
+ my $ekey = $ibx->eidx_key;
+ local $0 = "sync $ekey";
+ $pr->("indexing $ekey\n") if $pr;
+ $self->idx_init($opt);
+ sync_inbox($self, $self->{-watch_sync}, $ibx);
+ $self->{-commit_timer} //= PublicInbox::DS::add_timer(
+ $opt->{'commit-interval'} // 10,
+ \&_watch_commit, $self);
+}
+
+sub eidx_reload { # -extindex --watch SIGHUP handler
+ my ($self, $idler) = @_;
+ if ($self->{cfg}) {
+ my $pr = $self->{-watch_sync}->{-opt}->{-progress};
+ $pr->('reloading ...') if $pr;
+ delete $self->{-resync_queue};
+ @{$self->{ibx_list}} = ();
+ %{$self->{ibx_map}} = ();
+ delete $self->{-watch_sync}->{id2pos};
+ my $cfg = PublicInbox::Config->new;
+ attach_config($self, $cfg);
+ $idler->refresh($cfg);
+ $pr->(" done\n") if $pr;
+ } else {
+ warn "reload not supported without --all\n";
+ }
+}
+
+sub eidx_resync_start ($) { # -extindex --watch SIGUSR1 handler
+ my ($self) = @_;
+ $self->{-resync_queue} //= [ @{$self->{ibx_list}} ];
+ PublicInbox::DS::requeue($self); # trigger our ->event_step
+}
+
+sub event_step { # PublicInbox::DS::requeue callback
+ my ($self) = @_;
+ if (my $resync_queue = $self->{-resync_queue}) {
+ if (my $ibx = shift(@$resync_queue)) {
+ on_inbox_unlock($self, $ibx);
+ PublicInbox::DS::requeue($self);
+ } else {
+ delete $self->{-resync_queue};
+ _watch_commit($self);
+ }
+ } else {
+ done($self) unless $self->{-commit_timer};
+ }
+}
+
+sub eidx_watch { # public-inbox-extindex --watch main loop
+ my ($self, $opt) = @_;
+ local %SIG = %SIG;
+ for my $sig (qw(HUP USR1 TSTP QUIT INT TERM)) {
+ $SIG{$sig} = sub { warn "SIG$sig ignored while scanning\n" };
+ }
+ require PublicInbox::InboxIdle;
+ require PublicInbox::DS;
+ require PublicInbox::Syscall;
+ require PublicInbox::Sigfd;
+ my $idler = PublicInbox::InboxIdle->new($self->{cfg});
+ if (!$self->{cfg}) {
+ $idler->watch_inbox($_) for @{$self->{ibx_list}};
+ }
+ $_->subscribe_unlock(__PACKAGE__, $self) for @{$self->{ibx_list}};
+ my $pr = $opt->{-progress};
+ $pr->("performing initial scan ...\n") if $pr;
+ my $sync = eidx_sync($self, $opt); # initial sync
+ return if $sync->{quit};
+ my $oldset = PublicInbox::DS::block_signals();
+ local $self->{current_info} = '';
+ my $cb = $SIG{__WARN__} || \&CORE::warn;
+ local $SIG{__WARN__} = sub { $cb->($self->{current_info}, ': ', @_) };
+ my $sig = {
+ HUP => sub { eidx_reload($self, $idler) },
+ USR1 => sub { eidx_resync_start($self) },
+ TSTP => sub { kill('STOP', $$) },
+ };
+ my $quit = PublicInbox::SearchIdx::quit_cb($sync);
+ $sig->{QUIT} = $sig->{INT} = $sig->{TERM} = $quit;
+ my $sigfd = PublicInbox::Sigfd->new($sig,
+ $PublicInbox::Syscall::SFD_NONBLOCK);
+ %SIG = (%SIG, %$sig) if !$sigfd;
+ local $self->{-watch_sync} = $sync; # for ->on_inbox_unlock
+ if (!$sigfd) {
+ # wake up every second to accept signals if we don't
+ # have signalfd or IO::KQueue:
+ PublicInbox::DS::sig_setmask($oldset);
+ PublicInbox::DS->SetLoopTimeout(1000);
+ }
+ PublicInbox::DS->SetPostLoopCallback(sub { !$sync->{quit} });
+ $pr->("initial scan complete, entering event loop\n") if $pr;
+ PublicInbox::DS->EventLoop; # calls InboxIdle->event_step
+ done($self);
+}
+
no warnings 'once';
*done = \&PublicInbox::V2Writable::done;
*with_umask = \&PublicInbox::InboxWritable::with_umask;