]> Sergey Matveev's repositories - public-inbox.git/commitdiff
lei reindex: new command to reindex lei/store
authorEric Wong <e@80x24.org>
Wed, 17 Aug 2022 09:33:17 +0000 (09:33 +0000)
committerEric Wong <e@80x24.org>
Fri, 19 Aug 2022 09:03:49 +0000 (09:03 +0000)
Documentation/lei-reindex.pod [new file with mode: 0644]
MANIFEST
lib/PublicInbox/LEI.pm
lib/PublicInbox/LeiReindex.pm [new file with mode: 0644]
lib/PublicInbox/LeiStore.pm

diff --git a/Documentation/lei-reindex.pod b/Documentation/lei-reindex.pod
new file mode 100644 (file)
index 0000000..3a5861c
--- /dev/null
@@ -0,0 +1,47 @@
+=head1 NAME
+
+lei-reindex - reindex messages already in lei/store
+
+=head1 SYNOPSIS
+
+lei reindex [OPTIONS]
+
+=head1 DESCRIPTION
+
+Forces a re-index of all messages previously-indexed by L<lei-import(1)>
+or L<lei-index(1)>.  This can be used for in-place upgrades and bugfixes
+while other processes are querying the store.  Keep in mind this roughly
+doubles the size of the already-large Xapian database.
+
+It does not re-index messages in externals, using the C<--reindex>
+switch of L<public-inbox-index(1)> or L<public-inbox-extindex(1)> is
+needed for that.
+
+=head1 OPTIONS
+
+=over
+
+=item -q
+
+=item --quiet
+
+Suppress feedback messages.
+
+=back
+
+=head1 CONTACT
+
+Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
+
+The mail archives are hosted at L<https://public-inbox.org/meta/> and
+L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/>
+
+=head1 COPYRIGHT
+
+Copyright all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<lei-index(1)>, L<lei-import(1)>
index cc0a9a4c20bafd4e5591781bf4648d6e8b77bcd9..27e4c4e0aebd43ed19193608ec047522f137a264 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -56,6 +56,7 @@ Documentation/lei-p2q.pod
 Documentation/lei-q.pod
 Documentation/lei-rediff.pod
 Documentation/lei-refresh-mail-sync.pod
+Documentation/lei-reindex.pod
 Documentation/lei-rm-watch.pod
 Documentation/lei-rm.pod
 Documentation/lei-security.pod
@@ -256,6 +257,7 @@ lib/PublicInbox/LeiPmdir.pm
 lib/PublicInbox/LeiQuery.pm
 lib/PublicInbox/LeiRediff.pm
 lib/PublicInbox/LeiRefreshMailSync.pm
+lib/PublicInbox/LeiReindex.pm
 lib/PublicInbox/LeiRemote.pm
 lib/PublicInbox/LeiRm.pm
 lib/PublicInbox/LeiRmWatch.pm
index 595b3fa9885fa51fe197b449c010a96a511fb35a..8a3a3ab607fe501cab30827cda019541dd655ed5 100644 (file)
@@ -253,6 +253,8 @@ our %CMD = ( # sorted in order of importance/use:
 'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch',
        qw(prune), @c_opt ],
 
+'reindex' => [ '', 'reindex all locally-indexed messages', @c_opt ],
+
 'index' => [ 'LOCATION...', 'one-time index from URL or filesystem',
        qw(in-format|F=s kw! offset=i recursive|r exclude=s include|I=s
        verbose|v+ incremental!), @net_opt, # mainly for --proxy=
diff --git a/lib/PublicInbox/LeiReindex.pm b/lib/PublicInbox/LeiReindex.pm
new file mode 100644 (file)
index 0000000..3f109f3
--- /dev/null
@@ -0,0 +1,49 @@
+# Copyright all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei reindex" command to reindex everything in lei/store
+package PublicInbox::LeiReindex;
+use v5.12;
+use parent qw(PublicInbox::IPC);
+
+sub reindex_full {
+       my ($lei) = @_;
+       my $sto = $lei->{sto};
+       my $max = $sto->search->over(1)->max;
+       $lei->qerr("# reindexing 1..$max");
+       $sto->wq_do('reindex_art', $_) for (1..$max);
+}
+
+sub reindex_store { # via wq_do
+       my ($self) = @_;
+       my ($lei, $argv) = delete @$self{qw(lei argv)};
+       if (!@$argv) {
+               reindex_full($lei);
+       }
+}
+
+sub lei_reindex {
+       my ($lei, @argv) = @_;
+       my $sto = $lei->_lei_store or return $lei->fail('nothing indexed');
+       $sto->write_prepare($lei);
+       my $self = bless { lei => $lei, argv => \@argv }, __PACKAGE__;
+       my ($op_c, $ops) = $lei->workers_start($self, 1);
+       $lei->{wq1} = $self;
+       $lei->wait_wq_events($op_c, $ops);
+       $self->wq_do('reindex_store');
+       $self->wq_close;
+}
+
+sub _lei_wq_eof { # EOF callback for main lei daemon
+       my ($lei) = @_;
+       $lei->{sto}->wq_do('reindex_done');
+       $lei->wq_eof;
+}
+
+sub ipc_atfork_child {
+       my ($self) = @_;
+       $self->{lei}->_lei_atfork_child;
+       $self->SUPER::ipc_atfork_child;
+}
+
+1;
index d49746cba34f2e9c4ccb1886e2a0447616c58cf4..277ed6bdaed7b646325e15bb9871ef0736363aae 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Local storage (cache/memo) for lei(1), suitable for personal/private
@@ -335,6 +335,36 @@ sub _docids_and_maybe_kw ($$) {
        ($docids, [ sort keys %$kw ]);
 }
 
+sub _reindex_1 { # git->cat_async callback
+       my ($bref, $hex, $type, $size, $smsg) = @_;
+       my ($self, $eidx, $tl) = delete @$smsg{qw(-self -eidx -tl)};
+       $bref //= _lms_rw($self)->local_blob($hex, 1);
+       if ($bref) {
+               my $eml = PublicInbox::Eml->new($bref);
+               $smsg->{-merge_vmd} = 1; # preserve existing keywords
+               $eidx->idx_shard($smsg->{num})->index_eml($eml, $smsg);
+       } else {
+               warn("E: $type $hex\n");
+       }
+}
+
+sub reindex_art {
+       my ($self, $art) = @_;
+       my ($eidx, $tl) = eidx_init($self);
+       my $smsg = $eidx->{oidx}->get_art($art) // return;
+       return if $smsg->{bytes} == 0; # external-only message
+       @$smsg{qw(-self -eidx -tl)} = ($self, $eidx, $tl);
+       $eidx->git->cat_async($smsg->{blob} // die("no blob (#$art)"),
+                               \&_reindex_1, $smsg);
+}
+
+sub reindex_done {
+       my ($self) = @_;
+       my ($eidx, $tl) = eidx_init($self);
+       $eidx->git->async_wait_all;
+       # ->done to be called via sto_done_request
+}
+
 sub add_eml {
        my ($self, $eml, $vmd, $xoids) = @_;
        my $im = $self->{-fake_im} // $self->importer; # may create new epoch