X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLeiStore.pm;h=1cf7ffc1d206428ecb4b594620cb41a918465053;hb=b6b86cfd238c170ea3e2c4d4179f06c7af139086;hp=ae2639148918d6fc741c75f4039fdd01568021c7;hpb=86f7b16ee50081d4eed779372ccc198d8a1770dc;p=public-inbox.git diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index ae263914..1cf7ffc1 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -3,14 +3,20 @@ # # Local storage (cache/memo) for lei(1), suitable for personal/private # mail iff on encrypted device/FS. Based on v2, but only deduplicates -# based on git OID. +# git storage based on git OID (index deduplication is done in ContentHash) # # for xref3, the following are constant: $eidx_key = '.', $xnum = -1 +# +# We rely on the synchronous IPC API for this in lei-daemon and +# multiple lei clients to write to it at once. This allows the +# lei/store IPC process to be decoupled from network latency in +# lei WQ workers. package PublicInbox::LeiStore; use strict; use v5.10.1; use parent qw(PublicInbox::Lock PublicInbox::IPC); use PublicInbox::ExtSearchIdx; +use PublicInbox::Eml; use PublicInbox::Import; use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::V2Writable; @@ -18,7 +24,10 @@ use PublicInbox::ContentHash qw(content_hash); use PublicInbox::MID qw(mids); use PublicInbox::LeiSearch; use PublicInbox::MDA; +use PublicInbox::Spawn qw(spawn); use List::Util qw(max); +use File::Temp (); +use POSIX (); sub new { my (undef, $dir, $opt) = @_; @@ -51,10 +60,19 @@ sub git_epoch_max { sub git_ident ($) { my ($git) = @_; - chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT))); - warn "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?; - $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ ? ($1, $2) : - ('lei user', 'x@example.com') + my $rdr = {}; + open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!"; + chomp(my $i = $git->qx([qw(var GIT_COMMITTER_IDENT)], undef, $rdr)); + $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ and return ($1, $2); + my ($user, undef, undef, undef, undef, undef, $gecos) = getpwuid($<); + ($user) = (($user // $ENV{USER} // '') =~ /([\w\-\.\+]+)/); + $user //= 'lei-user'; + ($gecos) = (($gecos // '') =~ /([\w\-\.\+ \t]+)/); + $gecos //= 'lei user'; + require Sys::Hostname; + my ($host) = (Sys::Hostname::hostname() =~ /([\w\-\.]+)/); + $host //= 'localhost'; + ($gecos, "$user\@$host") } sub importer { @@ -77,7 +95,10 @@ sub importer { my $old = -e $latest; PublicInbox::Import::init_bare($latest); my $git = PublicInbox::Git->new($latest); - $git->qx(qw(config core.sharedRepository 0600)) if !$old; + if (!$old) { + $git->qx(qw(config core.sharedRepository 0600)); + $self->done; # force eidx_init on next round + } my $packed_bytes = $git->packed_bytes; my $unpacked_bytes = $packed_bytes / $self->packing_factor; if ($unpacked_bytes >= $self->rotate_bytes) { @@ -97,23 +118,33 @@ sub search { PublicInbox::LeiSearch->new($_[0]->{priv_eidx}->{topdir}); } +# follows the stderr file +sub _tail_err { + my ($self) = @_; + print { $self->{-err_wr} } readline($self->{-tmp_err}); +} + sub eidx_init { my ($self) = @_; my $eidx = $self->{priv_eidx}; + my $tl = wantarray && $self->{-err_wr} ? + PublicInbox::OnDestroy->new($$, \&_tail_err, $self) : + undef; $eidx->idx_init({-private => 1}); - $eidx; + wantarray ? ($eidx, $tl) : $eidx; } sub _docids_for ($$) { my ($self, $eml) = @_; my %docids; + my $eidx = $self->{priv_eidx}; my ($chash, $mids) = PublicInbox::LeiSearch::content_key($eml); - my $eidx = eidx_init($self); my $oidx = $eidx->{oidx}; my $im = $self->{im}; for my $mid (@$mids) { my ($id, $prev); while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) { + next if $cur->{bytes} == 0; # external-only message my $oid = $cur->{blob}; my $docid = $cur->{num}; my $bref = $im ? $im->cat_blob($oid) : undef; @@ -130,18 +161,18 @@ sub _docids_for ($$) { } sub set_eml_vmd { - my ($self, $eml, $vmd) = @_; - my $eidx = eidx_init($self); - my @docids = _docids_for($self, $eml); - for my $docid (@docids) { + my ($self, $eml, $vmd, $docids) = @_; + my ($eidx, $tl) = eidx_init($self); + $docids //= [ _docids_for($self, $eml) ]; + for my $docid (@$docids) { $eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd); } - \@docids; + $docids; } sub add_eml_vmd { my ($self, $eml, $vmd) = @_; - my $eidx = eidx_init($self); + my ($eidx, $tl) = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { $eidx->idx_shard($docid)->ipc_do('add_vmd', $docid, $vmd); @@ -151,7 +182,7 @@ sub add_eml_vmd { sub remove_eml_vmd { my ($self, $eml, $vmd) = @_; - my $eidx = eidx_init($self); + my ($eidx, $tl) = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { $eidx->idx_shard($docid)->ipc_do('remove_vmd', $docid, $vmd); @@ -159,16 +190,64 @@ sub remove_eml_vmd { \@docids; } +sub set_sync_info ($$$) { + my ($self, $oidhex, $sync_info) = @_; + ($self->{lms} //= do { + require PublicInbox::LeiMailSync; + my $f = "$self->{priv_eidx}->{topdir}/mail_sync.sqlite3"; + my $lms = PublicInbox::LeiMailSync->new($f); + $lms->lms_begin; + $lms; + })->set_src($oidhex, @$sync_info); +} + sub add_eml { - my ($self, $eml, $vmd) = @_; + my ($self, $eml, $vmd, $xoids) = @_; my $im = $self->importer; # may create new epoch - my $eidx = eidx_init($self); # writes ALL.git/objects/info/alternates - my $oidx = $eidx->{oidx}; + my ($eidx, $tl) = eidx_init($self); # updates/writes alternates file + my $oidx = $eidx->{oidx}; # PublicInbox::Import::add checks this my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg'; - $im->add($eml, undef, $smsg) or return; # duplicate returns undef + my $im_mark = $im->add($eml, undef, $smsg); + if ($vmd && $vmd->{sync_info}) { + set_sync_info($self, $smsg->{blob}, $vmd->{sync_info}); + } + $im_mark or return; # duplicate blob returns undef local $self->{current_info} = $smsg->{blob}; - if (my @docids = _docids_for($self, $eml)) { + my $vivify_xvmd = delete($smsg->{-vivify_xvmd}) // []; # exact matches + if ($xoids) { # fuzzy matches from externals in ale->xoids_for + delete $xoids->{$smsg->{blob}}; # added later + if (scalar keys %$xoids) { + my %docids = map { $_ => 1 } @$vivify_xvmd; + for my $oid (keys %$xoids) { + my @id = $oidx->blob_exists($oid); + @docids{@id} = @id; + } + @$vivify_xvmd = sort { $a <=> $b } keys(%docids); + } + } + if (@$vivify_xvmd) { + $xoids //= {}; + $xoids->{$smsg->{blob}} = 1; + for my $docid (@$vivify_xvmd) { + my $cur = $oidx->get_art($docid); + my $idx = $eidx->idx_shard($docid); + if (!$cur || $cur->{bytes} == 0) { # really vivifying + $smsg->{num} = $docid; + $oidx->add_overview($eml, $smsg); + $smsg->{-merge_vmd} = 1; + $idx->index_eml($eml, $smsg); + } else { # lse fuzzy hit off ale + $idx->ipc_do('add_eidx_info', $docid, '.', $eml); + } + for my $oid (keys %$xoids) { + $oidx->add_xref3($docid, -1, $oid, '.'); + } + $idx->ipc_do('add_vmd', $docid, $vmd) if $vmd; + } + $vivify_xvmd; + } elsif (my @docids = _docids_for($self, $eml)) { + # fuzzy match from within lei/store for my $docid (@docids) { my $idx = $eidx->idx_shard($docid); $oidx->add_xref3($docid, -1, $smsg->{blob}, '.'); @@ -177,38 +256,109 @@ sub add_eml { $idx->ipc_do('add_vmd', $docid, $vmd) if $vmd; } \@docids; - } else { + } else { # totally new message $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); $oidx->add_overview($eml, $smsg); $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); my $idx = $eidx->idx_shard($smsg->{num}); $idx->index_eml($eml, $smsg); - $idx->ipc_do('add_vmd', $smsg->{num}, $vmd ) if $vmd; + $idx->ipc_do('add_vmd', $smsg->{num}, $vmd) if $vmd; $smsg; } } sub set_eml { - my ($self, $eml, $vmd) = @_; - add_eml($self, $eml, $vmd) // set_eml_vmd($self, $eml, $vmd); + my ($self, $eml, $vmd, $xoids) = @_; + add_eml($self, $eml, $vmd, $xoids) // + set_eml_vmd($self, $eml, $vmd); } -sub add_eml_maybe { - my ($self, $eml) = @_; - my $lxs = $self->{lxs_all_local} // die 'BUG: no {lxs_all_local}'; - return if $lxs->xids_for($eml, 1); - add_eml($self, $eml); +sub _external_only ($$$) { + my ($self, $xoids, $eml) = @_; + my $eidx = $self->{priv_eidx}; + my $oidx = $eidx->{oidx} // die 'BUG: {oidx} missing'; + my $smsg = bless { blob => '' }, 'PublicInbox::Smsg'; + $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); + # save space for an externals-only message + my $hdr = $eml->header_obj; + $smsg->populate($hdr); # sets lines == 0 + $smsg->{bytes} = 0; + delete @$smsg{qw(From Subject)}; + $smsg->{to} = $smsg->{cc} = $smsg->{from} = ''; + $oidx->add_overview($hdr, $smsg); # subject+references for threading + $smsg->{subject} = ''; + for my $oid (keys %$xoids) { + $oidx->add_xref3($smsg->{num}, -1, $oid, '.'); + } + my $idx = $eidx->idx_shard($smsg->{num}); + $idx->index_eml(PublicInbox::Eml->new("\n\n"), $smsg); + ($smsg, $idx); +} + +sub update_xvmd { + my ($self, $xoids, $eml, $vmd_mod) = @_; + my ($eidx, $tl) = eidx_init($self); + my $oidx = $eidx->{oidx}; + my %seen; + for my $oid (keys %$xoids) { + my @docids = $oidx->blob_exists($oid) or next; + scalar(@docids) > 1 and + warn "W: $oid indexed as multiple docids: @docids\n"; + for my $docid (@docids) { + next if $seen{$docid}++; + my $idx = $eidx->idx_shard($docid); + $idx->ipc_do('update_vmd', $docid, $vmd_mod); + } + delete $xoids->{$oid}; + } + return unless scalar(keys(%$xoids)); + + # see if it was indexed, but with different OID(s) + if (my @docids = _docids_for($self, $eml)) { + for my $docid (@docids) { + next if $seen{$docid}; + for my $oid (keys %$xoids) { + $oidx->add_xref3($docid, -1, $oid, '.'); + } + my $idx = $eidx->idx_shard($docid); + $idx->ipc_do('update_vmd', $docid, $vmd_mod); + } + return; + } + # totally unseen + my ($smsg, $idx) = _external_only($self, $xoids, $eml); + $idx->ipc_do('update_vmd', $smsg->{num}, $vmd_mod); } # set or update keywords for external message, called via ipc_do -sub set_xkw { - my ($self, $eml, $kw) = @_; - my $lxs = $self->{lxs_all_local} // die 'BUG: no {lxs_all_local}'; - if ($lxs->xids_for($eml, 1)) { # is it in a local external? - # TODO: index keywords only - } else { - set_eml($self, $eml, { kw => $kw }); +sub set_xvmd { + my ($self, $xoids, $eml, $vmd) = @_; + + my ($eidx, $tl) = eidx_init($self); + my $oidx = $eidx->{oidx}; + my %seen; + + # see if we can just update existing docs + for my $oid (keys %$xoids) { + my @docids = $oidx->blob_exists($oid) or next; + scalar(@docids) > 1 and + warn "W: $oid indexed as multiple docids: @docids\n"; + for my $docid (@docids) { + next if $seen{$docid}++; + my $idx = $eidx->idx_shard($docid); + $idx->ipc_do('set_vmd', $docid, $vmd); + } + delete $xoids->{$oid}; # all done with this oid } + return unless scalar(keys(%$xoids)); + + # n.b. we don't do _docids_for here, we expect the caller + # already checked $lse->kw_changed before calling this sub + + return unless (@{$vmd->{kw} // []}) || (@{$vmd->{L} // []}); + # totally unseen: + my ($smsg, $idx) = _external_only($self, $xoids, $eml); + $idx->ipc_do('add_vmd', $smsg->{num}, $vmd); } sub checkpoint { @@ -219,6 +369,21 @@ sub checkpoint { $self->{priv_eidx}->checkpoint($wait); } +sub xchg_stderr { + my ($self) = @_; + _tail_err($self) if $self->{-err_wr}; + my $dir = $self->{priv_eidx}->{topdir}; + return unless -e $dir; + my $old = delete $self->{-tmp_err}; + my $pfx = POSIX::strftime('%Y%m%d%H%M%S', gmtime(time)); + my $err = File::Temp->new(TEMPLATE => "$pfx.$$.lei_storeXXXX", + SUFFIX => '.err', DIR => $dir); + open STDERR, '>>', $err->filename or die "dup2: $!"; + STDERR->autoflush(1); # shared with shard subprocesses + $self->{-tmp_err} = $err; # separate file description for RO access + undef; +} + sub done { my ($self) = @_; my $err = ''; @@ -229,46 +394,48 @@ sub done { warn $err; } } - $self->{priv_eidx}->done; + if (my $lms = delete $self->{lms}) { + $lms->lms_commit; + } + $self->{priv_eidx}->done; # V2Writable::done + xchg_stderr($self); die $err if $err; } sub ipc_atfork_child { my ($self) = @_; my $lei = $self->{lei}; - $lei->lei_atfork_child(1) if $lei; - $self->SUPER::ipc_atfork_child; -} - -sub refresh_local_externals { - my ($self) = @_; - my $cfg = $self->{lei}->_lei_cfg or return; - my $cur_cfg = $self->{cur_cfg} // -1; - my $lxs = $self->{lxs_all_local}; - if ($cfg != $cur_cfg || !$lxs) { - $lxs = PublicInbox::LeiXSearch->new; - my @loc = $self->{lei}->externals_each; - for my $loc (@loc) { # locals only - $lxs->prepare_external($loc) if -d $loc; - } - $self->{lxs_all_local} = $lxs; - $self->{cur_cfg} = $cfg; + $lei->_lei_atfork_child(1) if $lei; + xchg_stderr($self); + if (my $err = delete($self->{err_pipe})) { + close $err->[0]; + $self->{-err_wr} = $err->[1]; } - ($lxs->{git_tmp} //= $lxs->git_tmp)->{git_dir}; + $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); + $self->SUPER::ipc_atfork_child; } sub write_prepare { my ($self, $lei) = @_; unless ($self->{-ipc_req}) { - require PublicInbox::LeiXSearch; - $self->ipc_lock_init; + my $d = $lei->store_path; + $self->ipc_lock_init("$d/ipc.lock"); + substr($d, -length('/lei/store'), 10, ''); + my $err_pipe; + unless ($lei->{oneshot}) { + pipe(my ($r, $w)) or die "pipe: $!"; + $err_pipe = [ $r, $w ]; + } # Mail we import into lei are private, so headers filtered out # by -mda for public mail are not appropriate local @PublicInbox::MDA::BAD_HEADERS = (); - $self->ipc_worker_spawn('lei_store', $lei->oldset, - { lei => $lei }); + $self->ipc_worker_spawn("lei/store $d", $lei->oldset, + { lei => $lei, err_pipe => $err_pipe }); + if ($err_pipe) { + require PublicInbox::LeiStoreErr; + PublicInbox::LeiStoreErr->new($err_pipe->[0], $lei); + } } - $lei->{all_ext_git_dir} = $self->ipc_do('refresh_local_externals'); $lei->{sto} = $self; }