X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLeiStore.pm;h=c1abc288bfcb46f539f0a689fac33c53030aa065;hb=7d2e572aca7297ea2015d2b6e7c71b672521ec82;hp=56f668b8cc768015420ea5aa82f59346312f7c1b;hpb=6cdb84af2c75b3c66a35c8c4973f455da15dd0a4;p=public-inbox.git diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 56f668b8..c1abc288 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2020 all contributors +# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # # Local storage (cache/memo) for lei(1), suitable for personal/private @@ -9,20 +9,23 @@ package PublicInbox::LeiStore; use strict; use v5.10.1; -use parent qw(PublicInbox::Lock); -use PublicInbox::SearchIdx qw(crlf_adjust); +use parent qw(PublicInbox::Lock PublicInbox::IPC); use PublicInbox::ExtSearchIdx; use PublicInbox::Import; -use PublicInbox::InboxWritable; +use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::V2Writable; use PublicInbox::ContentHash qw(content_hash); use PublicInbox::MID qw(mids); use PublicInbox::LeiSearch; +use PublicInbox::MDA; +use List::Util qw(max); sub new { my (undef, $dir, $opt) = @_; my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt); - bless { priv_eidx => $eidx }, __PACKAGE__; + my $self = bless { priv_eidx => $eidx }, __PACKAGE__; + eidx_init($self)->done if $opt->{creat}; + $self; } sub git { $_[0]->{priv_eidx}->git } # read-only @@ -37,15 +40,21 @@ sub git_pfx { "$_[0]->{priv_eidx}->{topdir}/local" }; sub git_epoch_max { my ($self) = @_; - my $pfx = $self->git_pfx; - my $max = 0; - return $max unless -d $pfx ; - opendir my $dh, $pfx or die "opendir $pfx: $!\n"; - while (defined(my $git_dir = readdir($dh))) { - $git_dir =~ m!\A([0-9]+)\.git\z! or next; - $max = $1 + 0 if $1 > $max; + if (opendir(my $dh, $self->git_pfx)) { + max(map { + substr($_, 0, -4) + 0; # drop ".git" suffix + } grep(/\A[0-9]+\.git\z/, readdir($dh))) // 0; + } else { + $!{ENOENT} ? 0 : die("opendir ${\$self->git_pfx}: $!\n"); } - $max; +} + +sub git_ident ($) { + my ($git) = @_; + chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT))); + warn "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?; + $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ ? ($1, $2) : + ('lei user', 'x@example.com') } sub importer { @@ -66,8 +75,8 @@ sub importer { while (1) { my $latest = "$pfx/$max.git"; my $old = -e $latest; + PublicInbox::Import::init_bare($latest); my $git = PublicInbox::Git->new($latest); - PublicInbox::Import::init_bare({ git => $git }); $git->qx(qw(config core.sharedRepository 0600)) if !$old; my $packed_bytes = $git->packed_bytes; my $unpacked_bytes = $packed_bytes / $self->packing_factor; @@ -75,10 +84,7 @@ sub importer { $max++; next; } - chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT))); - die "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?; - my ($n, $e) = ($i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/g) - or die "could not extract name/email from `$i'\n"; + my ($n, $e) = git_ident($git); $self->{im} = $im = PublicInbox::Import->new($git, $n, $e); $im->{bytes_added} = int($packed_bytes / $self->packing_factor); $im->{lock_path} = undef; @@ -101,11 +107,11 @@ sub eidx_init { sub _docids_for ($$) { my ($self, $eml) = @_; my %docids; - my $chash = content_hash($eml); + my ($chash, $mids) = PublicInbox::LeiSearch::content_key($eml); my $eidx = eidx_init($self); my $oidx = $eidx->{oidx}; my $im = $self->{im}; - for my $mid (@{mids($eml)}) { + for my $mid (@$mids) { my ($id, $prev); while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) { my $oid = $cur->{blob}; @@ -123,61 +129,94 @@ sub _docids_for ($$) { sort { $a <=> $b } values %docids; } -sub set_eml_keywords { - my ($self, $eml, @kw) = @_; +sub set_eml_vmd { + my ($self, $eml, $vmd) = @_; my $eidx = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { - $eidx->idx_shard($docid)->shard_set_keywords($docid, @kw); + $eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd); } \@docids; } -sub add_eml_keywords { - my ($self, $eml, @kw) = @_; +sub add_eml_vmd { + my ($self, $eml, $vmd) = @_; my $eidx = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { - $eidx->idx_shard($docid)->shard_add_keywords($docid, @kw); + $eidx->idx_shard($docid)->ipc_do('add_vmd', $docid, $vmd); } \@docids; } -sub remove_eml_keywords { - my ($self, $eml, @kw) = @_; +sub remove_eml_vmd { + my ($self, $eml, $vmd) = @_; my $eidx = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { - $eidx->idx_shard($docid)->shard_remove_keywords($docid, @kw); + $eidx->idx_shard($docid)->ipc_do('remove_vmd', $docid, $vmd); } \@docids; } sub add_eml { - my ($self, $eml) = @_; - my $eidx = eidx_init($self); + my ($self, $eml, $vmd) = @_; + my $im = $self->importer; # may create new epoch + my $eidx = eidx_init($self); # writes ALL.git/objects/info/alternates my $oidx = $eidx->{oidx}; my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg'; - my $im = $self->importer; $im->add($eml, undef, $smsg) or return; # duplicate returns undef - my $msgref = delete $smsg->{-raw_email}; - $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref); local $self->{current_info} = $smsg->{blob}; if (my @docids = _docids_for($self, $eml)) { for my $docid (@docids) { my $idx = $eidx->idx_shard($docid); $oidx->add_xref3($docid, -1, $smsg->{blob}, '.'); - $idx->shard_add_eidx_info($docid, '.', $eml); # List-Id + # add_eidx_info for List-Id + $idx->ipc_do('add_eidx_info', $docid, '.', $eml); + $idx->ipc_do('add_vmd', $docid, $vmd) if $vmd; } + \@docids; } else { $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); $oidx->add_overview($eml, $smsg); $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); my $idx = $eidx->idx_shard($smsg->{num}); - $idx->index_raw($msgref, $eml, $smsg); + $idx->index_eml($eml, $smsg); + $idx->ipc_do('add_vmd', $smsg->{num}, $vmd ) if $vmd; + $smsg; + } +} + +sub set_eml { + my ($self, $eml, $vmd) = @_; + add_eml($self, $eml, $vmd) // set_eml_vmd($self, $eml, $vmd); +} + +sub add_eml_maybe { + my ($self, $eml) = @_; + my $lxs = $self->{lxs_all_local} // die 'BUG: no {lxs_all_local}'; + return if $lxs->xids_for($eml, 1); + add_eml($self, $eml); +} + +# set or update keywords for external message, called via ipc_do +sub set_xkw { + my ($self, $eml, $kw) = @_; + my $lxs = $self->{lxs_all_local} // die 'BUG: no {lxs_all_local}'; + if ($lxs->xids_for($eml, 1)) { # is it in a local external? + # TODO: index keywords only + } else { + set_eml($self, $eml, { kw => $kw }); } - $smsg->{blob} +} + +sub checkpoint { + my ($self, $wait) = @_; + if (my $im = $self->{im}) { + $wait ? $im->barrier : $im->checkpoint; + } + $self->{priv_eidx}->checkpoint($wait); } sub done { @@ -194,4 +233,44 @@ sub done { die $err if $err; } +sub ipc_atfork_child { + my ($self) = @_; + my $lei = $self->{lei}; + $lei->lei_atfork_child(1) if $lei; + $self->SUPER::ipc_atfork_child; +} + +sub refresh_local_externals { + my ($self) = @_; + my $cfg = $self->{lei}->_lei_cfg or return; + my $cur_cfg = $self->{cur_cfg} // -1; + my $lxs = $self->{lxs_all_local}; + if ($cfg != $cur_cfg || !$lxs) { + $lxs = PublicInbox::LeiXSearch->new; + my @loc = $self->{lei}->externals_each; + for my $loc (@loc) { # locals only + $lxs->prepare_external($loc) if -d $loc; + } + $self->{lei}->ale->refresh_externals($lxs); + $lxs->{git} = $self->{lei}->ale->git; + $self->{lxs_all_local} = $lxs; + $self->{cur_cfg} = $cfg; + } +} + +sub write_prepare { + my ($self, $lei) = @_; + unless ($self->{-ipc_req}) { + require PublicInbox::LeiXSearch; + $self->ipc_lock_init($lei->store_path . '/ipc.lock'); + # Mail we import into lei are private, so headers filtered out + # by -mda for public mail are not appropriate + local @PublicInbox::MDA::BAD_HEADERS = (); + $self->ipc_worker_spawn('lei_store', $lei->oldset, + { lei => $lei }); + } + my $wait = $self->ipc_do('refresh_local_externals'); + $lei->{sto} = $self; +} + 1;