X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLeiStore.pm;h=b4f40912aa61e5e6d1722105b1b53a30df4f228c;hb=ee355020415fbbebbeb8fbe79be4e4f3fa2e657b;hp=d2dd4e7bfec0d4bb5e2524b71ad242e6b7900be3;hpb=680a817ef1627bb8e149fd1967ecc05a8d634dc9;p=public-inbox.git diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index d2dd4e7b..b4f40912 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -3,9 +3,14 @@ # # Local storage (cache/memo) for lei(1), suitable for personal/private # mail iff on encrypted device/FS. Based on v2, but only deduplicates -# based on git OID. +# git storage based on git OID (index deduplication is done in ContentHash) # # for xref3, the following are constant: $eidx_key = '.', $xnum = -1 +# +# We rely on the synchronous IPC API for this in lei-daemon and +# multiple lei clients to write to it at once. This allows the +# lei/store IPC process to be decoupled from network latency in +# lei WQ workers. package PublicInbox::LeiStore; use strict; use v5.10.1; @@ -19,7 +24,14 @@ use PublicInbox::ContentHash qw(content_hash); use PublicInbox::MID qw(mids); use PublicInbox::LeiSearch; use PublicInbox::MDA; -use List::Util qw(max); +use PublicInbox::Spawn qw(spawn); +use PublicInbox::MdirReader; +use PublicInbox::LeiToMail; +use File::Temp (); +use POSIX (); +use IO::Handle (); # ->autoflush +use Sys::Syslog qw(syslog openlog); +use Errno qw(EEXIST ENOENT); sub new { my (undef, $dir, $opt) = @_; @@ -37,26 +49,21 @@ sub rotate_bytes { $_[0]->{rotate_bytes} // ((1024 * 1024 * 1024) / $_[0]->packing_factor) } -sub git_pfx { "$_[0]->{priv_eidx}->{topdir}/local" }; - -sub git_epoch_max { - my ($self) = @_; - if (opendir(my $dh, $self->git_pfx)) { - max(map { - substr($_, 0, -4) + 0; # drop ".git" suffix - } grep(/\A[0-9]+\.git\z/, readdir($dh))) // 0; - } else { - $!{ENOENT} ? 0 : die("opendir ${\$self->git_pfx}: $!\n"); - } -} - sub git_ident ($) { my ($git) = @_; my $rdr = {}; open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!"; chomp(my $i = $git->qx([qw(var GIT_COMMITTER_IDENT)], undef, $rdr)); - $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ ? ($1, $2) : - ('lei user', 'x@example.com') + $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ and return ($1, $2); + my ($user, undef, undef, undef, undef, undef, $gecos) = getpwuid($<); + ($user) = (($user // $ENV{USER} // '') =~ /([\w\-\.\+]+)/); + $user //= 'lei-user'; + ($gecos) = (($gecos // '') =~ /([\w\-\.\+ \t]+)/); + $gecos //= 'lei user'; + require Sys::Hostname; + my ($host) = (Sys::Hostname::hostname() =~ /([\w\-\.]+)/); + $host //= 'localhost'; + ($gecos, "$user\@$host") } sub importer { @@ -70,16 +77,16 @@ sub importer { $im->done; undef $im; $self->checkpoint; - $max = $self->git_epoch_max + 1; + $max = $self->{priv_eidx}->{mg}->git_epochs + 1; } - my $pfx = $self->git_pfx; - $max //= $self->git_epoch_max; + my (undef, $tl) = eidx_init($self); # acquire lock + $max //= $self->{priv_eidx}->{mg}->git_epochs; while (1) { - my $latest = "$pfx/$max.git"; - my $old = -e $latest; - PublicInbox::Import::init_bare($latest); + my $latest = $self->{priv_eidx}->{mg}->add_epoch($max); my $git = PublicInbox::Git->new($latest); - $git->qx(qw(config core.sharedRepository 0600)) if !$old; + $self->done; # unlock + # re-acquire lock, update alternates for new epoch + (undef, $tl) = eidx_init($self); my $packed_bytes = $git->packed_bytes; my $unpacked_bytes = $packed_bytes / $self->packing_factor; if ($unpacked_bytes >= $self->rotate_bytes) { @@ -99,18 +106,27 @@ sub search { PublicInbox::LeiSearch->new($_[0]->{priv_eidx}->{topdir}); } +# follows the stderr file +sub _tail_err { + my ($self) = @_; + print { $self->{-err_wr} } readline($self->{-tmp_err}); +} + sub eidx_init { my ($self) = @_; my $eidx = $self->{priv_eidx}; - $eidx->idx_init({-private => 1}); - $eidx; + my $tl = wantarray && $self->{-err_wr} ? + PublicInbox::OnDestroy->new($$, \&_tail_err, $self) : + undef; + $eidx->idx_init({-private => 1}); # acquires lock + wantarray ? ($eidx, $tl) : $eidx; } sub _docids_for ($$) { my ($self, $eml) = @_; my %docids; + my $eidx = $self->{priv_eidx}; my ($chash, $mids) = PublicInbox::LeiSearch::content_key($eml); - my $eidx = eidx_init($self); my $oidx = $eidx->{oidx}; my $im = $self->{im}; for my $mid (@$mids) { @@ -120,7 +136,8 @@ sub _docids_for ($$) { my $oid = $cur->{blob}; my $docid = $cur->{num}; my $bref = $im ? $im->cat_blob($oid) : undef; - $bref //= $eidx->git->cat_file($oid) // do { + $bref //= $eidx->git->cat_file($oid) // + _lms_rw($self)->local_blob($oid, 1) // do { warn "W: $oid (#$docid) <$mid> not found\n"; next; }; @@ -132,19 +149,95 @@ sub _docids_for ($$) { sort { $a <=> $b } values %docids; } +# n.b. similar to LeiExportKw->export_kw_md, but this is for a single eml +sub export1_kw_md ($$$$$) { + my ($self, $mdir, $bn, $oidbin, $vmdish) = @_; # vmd/vmd_mod + my $orig = $bn; + my (@try, $unkn, $kw); + if ($bn =~ s/:2,([a-zA-Z]*)\z//) { + ($kw, $unkn) = PublicInbox::MdirReader::flags2kw($1); + if (my $set = $vmdish->{kw}) { + $kw = $set; + } elsif (my $add = $vmdish->{'+kw'}) { + @$kw{@$add} = (); + } elsif (my $del = $vmdish->{-kw}) { + delete @$kw{@$del}; + } # else no changes... + @try = qw(cur new); + } else { # no keywords, yet, could be in new/ + @try = qw(new cur); + $unkn = []; + if (my $set = $vmdish->{kw}) { + $kw = $set; + } elsif (my $add = $vmdish->{'+kw'}) { + @$kw{@$add} = (); # auto-vivify + } else { # ignore $vmdish->{-kw} + $kw = []; + } + } + $kw = [ keys %$kw ] if ref($kw) eq 'HASH'; + $bn .= ':2,'. PublicInbox::LeiToMail::kw2suffix($kw, @$unkn); + return if $orig eq $bn; # no change + + # we use link(2) + unlink(2) since rename(2) may + # inadvertently clobber if the "uniquefilename" part wasn't + # actually unique. + my $dst = "$mdir/cur/$bn"; + for my $d (@try) { + my $src = "$mdir/$d/$orig"; + if (link($src, $dst)) { + if (!unlink($src) and $! != ENOENT) { + syslog('warning', "unlink($src): $!"); + } + # TODO: verify oidbin? + $self->{lms}->mv_src("maildir:$mdir", + $oidbin, \$orig, $bn); + return; + } elsif ($! == EEXIST) { # lost race with "lei export-kw"? + return; + } elsif ($! != ENOENT) { + syslog('warning', "link($src -> $dst): $!"); + } + } + for (@try) { return if -e "$mdir/$_/$orig" }; + $self->{lms}->clear_src("maildir:$mdir", \$orig); +} + +sub sto_export_kw ($$$) { + my ($self, $docid, $vmdish) = @_; # vmdish (vmd or vmd_mod) + my ($eidx, $tl) = eidx_init($self); + my $lms = _lms_rw($self) // return; + my $xr3 = $eidx->{oidx}->get_xref3($docid, 1); + for my $row (@$xr3) { + my (undef, undef, $oidbin) = @$row; + my $locs = $lms->locations_for($oidbin) // next; + while (my ($loc, $ids) = each %$locs) { + if ($loc =~ s!\Amaildir:!!i) { + for my $id (@$ids) { + export1_kw_md($self, $loc, $id, + $oidbin, $vmdish); + } + } + # TODO: IMAP + } + } +} + +# vmd = { kw => [ qw(seen ...) ], L => [ qw(inbox ...) ] } sub set_eml_vmd { my ($self, $eml, $vmd, $docids) = @_; - my $eidx = eidx_init($self); + my ($eidx, $tl) = eidx_init($self); $docids //= [ _docids_for($self, $eml) ]; for my $docid (@$docids) { $eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd); + sto_export_kw($self, $docid, $vmd); } $docids; } sub add_eml_vmd { my ($self, $eml, $vmd) = @_; - my $eidx = eidx_init($self); + my ($eidx, $tl) = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { $eidx->idx_shard($docid)->ipc_do('add_vmd', $docid, $vmd); @@ -152,9 +245,9 @@ sub add_eml_vmd { \@docids; } -sub remove_eml_vmd { +sub remove_eml_vmd { # remove just the VMD my ($self, $eml, $vmd) = @_; - my $eidx = eidx_init($self); + my ($eidx, $tl) = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { $eidx->idx_shard($docid)->ipc_do('remove_vmd', $docid, $vmd); @@ -162,13 +255,92 @@ sub remove_eml_vmd { \@docids; } +sub _lms_rw ($) { # it is important to have eidx processes open before lms + my ($self) = @_; + my ($eidx, $tl) = eidx_init($self); + $self->{lms} //= do { + require PublicInbox::LeiMailSync; + my $f = "$self->{priv_eidx}->{topdir}/mail_sync.sqlite3"; + my $lms = PublicInbox::LeiMailSync->new($f); + $lms->lms_write_prepare; + $lms; + }; +} + +sub set_sync_info { + my ($self, $oidhex, $folder, $id) = @_; + _lms_rw($self)->set_src(pack('H*', $oidhex), $folder, $id); +} + +sub _remove_if_local { # git->cat_async arg + my ($bref, $oidhex, $type, $size, $self) = @_; + $self->{im}->remove($bref) if $bref; +} + +sub remove_docids ($;@) { + my ($self, @docids) = @_; + my $eidx = eidx_init($self); + for my $docid (@docids) { + $eidx->idx_shard($docid)->ipc_do('xdb_remove', $docid); + $eidx->{oidx}->delete_by_num($docid); + $eidx->{oidx}->{dbh}->do(<importer; # may create new epoch + my ($eidx, $tl) = eidx_init($self); + my $oidx = $eidx->{oidx}; + my @docids = _docids_for($self, $eml); + my $git = $eidx->git; + for my $docid (@docids) { + my $xr3 = $oidx->get_xref3($docid, 1); + for my $row (@$xr3) { + my (undef, undef, $oidbin) = @$row; + my $oidhex = unpack('H*', $oidbin); + $git->cat_async($oidhex, \&_remove_if_local, $self); + } + } + $git->cat_async_wait; + remove_docids($self, @docids); + \@docids; +} + +sub oid2docid ($$) { + my ($self, $oid) = @_; + my $eidx = eidx_init($self); + my ($docid, @cull) = $eidx->{oidx}->blob_exists($oid); + if (@cull) { # fixup old bugs... + warn <ipc_do('add_vmd', $docid, $vmd); + sto_export_kw($self, $docid, $vmd); +} + sub add_eml { my ($self, $eml, $vmd, $xoids) = @_; - my $im = $self->importer; # may create new epoch - my $eidx = eidx_init($self); # writes ALL.git/objects/info/alternates + my $im = $self->{-fake_im} // $self->importer; # may create new epoch + my ($eidx, $tl) = eidx_init($self); my $oidx = $eidx->{oidx}; # PublicInbox::Import::add checks this my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg'; - $im->add($eml, undef, $smsg) or return; # duplicate returns undef + $smsg->{-eidx_git} = $eidx->git if !$self->{-fake_im}; + my $im_mark = $im->add($eml, undef, $smsg); + if ($vmd && $vmd->{sync_info}) { + set_sync_info($self, $smsg->{blob}, @{$vmd->{sync_info}}); + } + $im_mark or return; # duplicate blob returns undef local $self->{current_info} = $smsg->{blob}; my $vivify_xvmd = delete($smsg->{-vivify_xvmd}) // []; # exact matches @@ -177,13 +349,13 @@ sub add_eml { if (scalar keys %$xoids) { my %docids = map { $_ => 1 } @$vivify_xvmd; for my $oid (keys %$xoids) { - my @id = $oidx->blob_exists($oid); - @docids{@id} = @id; + my $docid = oid2docid($self, $oid); + $docids{$docid} = $docid if defined($docid); } @$vivify_xvmd = sort { $a <=> $b } keys(%docids); } } - if (@$vivify_xvmd) { + if (@$vivify_xvmd) { # docids list $xoids //= {}; $xoids->{$smsg->{blob}} = 1; for my $docid (@$vivify_xvmd) { @@ -200,7 +372,7 @@ sub add_eml { for my $oid (keys %$xoids) { $oidx->add_xref3($docid, -1, $oid, '.'); } - $idx->ipc_do('add_vmd', $docid, $vmd) if $vmd; + _add_vmd($self, $idx, $docid, $vmd) if $vmd; } $vivify_xvmd; } elsif (my @docids = _docids_for($self, $eml)) { @@ -210,16 +382,17 @@ sub add_eml { $oidx->add_xref3($docid, -1, $smsg->{blob}, '.'); # add_eidx_info for List-Id $idx->ipc_do('add_eidx_info', $docid, '.', $eml); - $idx->ipc_do('add_vmd', $docid, $vmd) if $vmd; + _add_vmd($self, $idx, $docid, $vmd) if $vmd; } \@docids; } else { # totally new message + delete $smsg->{-oidx}; # for IPC-friendliness $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); $oidx->add_overview($eml, $smsg); $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); my $idx = $eidx->idx_shard($smsg->{num}); $idx->index_eml($eml, $smsg); - $idx->ipc_do('add_vmd', $smsg->{num}, $vmd) if $vmd; + _add_vmd($self, $idx, $smsg->{num}, $vmd) if $vmd; $smsg; } } @@ -230,6 +403,14 @@ sub set_eml { set_eml_vmd($self, $eml, $vmd); } +sub index_eml_only { + my ($self, $eml, $vmd, $xoids) = @_; + require PublicInbox::FakeImport; + local $self->{-fake_im} = PublicInbox::FakeImport->new; + set_eml($self, $eml, $vmd, $xoids); +} + +# store {kw} / {L} info for a message which is only in an external sub _external_only ($$$) { my ($self, $xoids, $eml) = @_; my $eidx = $self->{priv_eidx}; @@ -254,19 +435,16 @@ sub _external_only ($$$) { sub update_xvmd { my ($self, $xoids, $eml, $vmd_mod) = @_; - my $eidx = eidx_init($self); + my ($eidx, $tl) = eidx_init($self); my $oidx = $eidx->{oidx}; my %seen; for my $oid (keys %$xoids) { - my @docids = $oidx->blob_exists($oid) or next; - scalar(@docids) > 1 and - warn "W: $oid indexed as multiple docids: @docids\n"; - for my $docid (@docids) { - next if $seen{$docid}++; - my $idx = $eidx->idx_shard($docid); - $idx->ipc_do('update_vmd', $docid, $vmd_mod); - } + my $docid = oid2docid($self, $oid) // next; delete $xoids->{$oid}; + next if $seen{$docid}++; + my $idx = $eidx->idx_shard($docid); + $idx->ipc_do('update_vmd', $docid, $vmd_mod); + sto_export_kw($self, $docid, $vmd_mod); } return unless scalar(keys(%$xoids)); @@ -279,33 +457,32 @@ sub update_xvmd { } my $idx = $eidx->idx_shard($docid); $idx->ipc_do('update_vmd', $docid, $vmd_mod); + sto_export_kw($self, $docid, $vmd_mod); } return; } # totally unseen my ($smsg, $idx) = _external_only($self, $xoids, $eml); $idx->ipc_do('update_vmd', $smsg->{num}, $vmd_mod); + sto_export_kw($self, $smsg->{num}, $vmd_mod); } # set or update keywords for external message, called via ipc_do sub set_xvmd { my ($self, $xoids, $eml, $vmd) = @_; - my $eidx = eidx_init($self); + my ($eidx, $tl) = eidx_init($self); my $oidx = $eidx->{oidx}; my %seen; # see if we can just update existing docs for my $oid (keys %$xoids) { - my @docids = $oidx->blob_exists($oid) or next; - scalar(@docids) > 1 and - warn "W: $oid indexed as multiple docids: @docids\n"; - for my $docid (@docids) { - next if $seen{$docid}++; - my $idx = $eidx->idx_shard($docid); - $idx->ipc_do('set_vmd', $docid, $vmd); - } + my $docid = oid2docid($self, $oid) // next; delete $xoids->{$oid}; # all done with this oid + next if $seen{$docid}++; + my $idx = $eidx->idx_shard($docid); + $idx->ipc_do('set_vmd', $docid, $vmd); + sto_export_kw($self, $docid, $vmd); } return unless scalar(keys(%$xoids)); @@ -316,6 +493,7 @@ sub set_xvmd { # totally unseen: my ($smsg, $idx) = _external_only($self, $xoids, $eml); $idx->ipc_do('add_vmd', $smsg->{num}, $vmd); + sto_export_kw($self, $smsg->{num}, $vmd); } sub checkpoint { @@ -323,11 +501,27 @@ sub checkpoint { if (my $im = $self->{im}) { $wait ? $im->barrier : $im->checkpoint; } + delete $self->{lms}; $self->{priv_eidx}->checkpoint($wait); } -sub done { +sub xchg_stderr { my ($self) = @_; + _tail_err($self) if $self->{-err_wr}; + my $dir = $self->{priv_eidx}->{topdir}; + return unless -e $dir; + my $old = delete $self->{-tmp_err}; + my $pfx = POSIX::strftime('%Y%m%d%H%M%S', gmtime(time)); + my $err = File::Temp->new(TEMPLATE => "$pfx.$$.lei_storeXXXX", + SUFFIX => '.err', DIR => $dir); + open STDERR, '>>', $err->filename or die "dup2: $!"; + STDERR->autoflush(1); # shared with shard subprocesses + $self->{-tmp_err} = $err; # separate file description for RO access + undef; +} + +sub done { + my ($self, $sock_ref) = @_; my $err = ''; if (my $im = delete($self->{im})) { eval { $im->done }; @@ -336,7 +530,9 @@ sub done { warn $err; } } - $self->{priv_eidx}->done; + delete $self->{lms}; + $self->{priv_eidx}->done; # V2Writable::done + xchg_stderr($self); die $err if $err; } @@ -344,21 +540,38 @@ sub ipc_atfork_child { my ($self) = @_; my $lei = $self->{lei}; $lei->_lei_atfork_child(1) if $lei; - $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); + xchg_stderr($self); + if (my $to_close = delete($self->{to_close})) { + close($_) for @$to_close; + } + openlog('lei/store', 'pid,nowait,nofatal,ndelay', 'user'); $self->SUPER::ipc_atfork_child; } +sub recv_and_run { + my ($self, @args) = @_; + local $PublicInbox::DS::in_loop = 0; # waitpid synchronously + $self->SUPER::recv_and_run(@args); +} + sub write_prepare { my ($self, $lei) = @_; + $lei // die 'BUG: $lei not passed'; unless ($self->{-ipc_req}) { - my $d = $lei->store_path; - $self->ipc_lock_init("$d/ipc.lock"); - substr($d, -length('/lei/store'), 10, ''); + my $dir = $lei->store_path; + substr($dir, -length('/lei/store'), 10, ''); + pipe(my ($r, $w)) or die "pipe: $!"; + $w->autoflush(1); # Mail we import into lei are private, so headers filtered out # by -mda for public mail are not appropriate local @PublicInbox::MDA::BAD_HEADERS = (); - $self->ipc_worker_spawn("lei/store $d", $lei->oldset, - { lei => $lei }); + $self->wq_workers_start("lei/store $dir", 1, $lei->oldset, { + lei => $lei, + -err_wr => $w, + to_close => [ $r ], + }); + require PublicInbox::LeiStoreErr; + PublicInbox::LeiStoreErr->new($r, $lei); } $lei->{sto} = $self; }