]> Sergey Matveev's repositories - public-inbox.git/blobdiff - lib/PublicInbox/LeiToMail.pm
lei2mail: parallel augment for lock-free stores
[public-inbox.git] / lib / PublicInbox / LeiToMail.pm
index 0e0b0a43bb9ddad945d66d67467c4fc6a68a5863..6efd398a76652c880637b17d0fe0194f5b1fa06a 100644 (file)
@@ -14,10 +14,12 @@ use PublicInbox::LeiDedupe;
 use PublicInbox::OnDestroy;
 use PublicInbox::Git;
 use PublicInbox::GitAsyncCat;
+use PublicInbox::PktOp qw(pkt_do);
 use Symbol qw(gensym);
 use IO::Handle; # ->autoflush
 use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
 use Errno qw(EEXIST ESPIPE ENOENT EPIPE);
+use Digest::SHA qw(sha256_hex);
 my ($maildir_each_file);
 
 # struggles with short-lived repos, Gcf2Client makes little sense with lei;
@@ -268,7 +270,15 @@ sub _mbox_write_cb ($$) {
 }
 
 sub _augment_file { # maildir_each_file cb
-       my ($f, $lei) = @_;
+       my ($f, $lei, $mod, $shard) = @_;
+       if ($mod) {
+               # can't get dirent.d_ino w/ pure Perl, so we extract the OID
+               # if it looks like one:
+               my $hex = $f =~ m!\b([a-f0-9]{40,})[^/]*\z! ?
+                               $1 : sha256_hex($f);
+               my $recno = hex(substr($hex, 0, 8));
+               return if ($recno % $mod) != $shard;
+       }
        my $eml = PublicInbox::InboxWritable::eml_from_path($f) or return;
        _augment($eml, $lei);
 }
@@ -336,7 +346,7 @@ sub _imap_write_cb ($$) {
        my $dedupe = $lei->{dedupe};
        $dedupe->prepare_dedupe if $dedupe;
        my $imap_append = $lei->{nwr}->can('imap_append');
-       my $mic = $lei->{nwr}->mic_get($lei->{ovv}->{dst});
+       my $mic = $lei->{nwr}->mic_get($self->{uri});
        my $folder = $self->{uri}->mailbox;
        sub { # for git_to_mail
                my ($bref, $smsg, $eml) = @_;
@@ -420,7 +430,9 @@ sub _do_augment_maildir {
        if ($lei->{opt}->{augment}) {
                my $dedupe = $lei->{dedupe};
                if ($dedupe && $dedupe->prepare_dedupe) {
-                       $maildir_each_file->($dst, \&_augment_file, $lei);
+                       my ($mod, $shard) = @{$self->{shard_info} // []};
+                       $maildir_each_file->($dst, \&_augment_file,
+                                               $lei, $mod, $shard);
                        $dedupe->pause_dedupe;
                }
        } else { # clobber existing Maildir
@@ -435,16 +447,15 @@ sub _augment_imap { # PublicInbox::NetReader::imap_each cb
 
 sub _do_augment_imap {
        my ($self, $lei) = @_;
-       my $dst = $lei->{ovv}->{dst};
        my $nwr = $lei->{nwr};
        if ($lei->{opt}->{augment}) {
                my $dedupe = $lei->{dedupe};
                if ($dedupe && $dedupe->prepare_dedupe) {
-                       $nwr->imap_each($dst, \&_augment_imap, $lei);
+                       $nwr->imap_each($self->{uri}, \&_augment_imap, $lei);
                        $dedupe->pause_dedupe;
                }
        } else { # clobber existing IMAP folder
-               $nwr->imap_delete_all($dst);
+               $nwr->imap_delete_all($self->{uri});
        }
 }
 
@@ -499,7 +510,7 @@ sub pre_augment { # fast (1 disk seek), runs in same process as post_augment
 
 sub do_augment { # slow, runs in wq worker
        my ($self, $lei) = @_;
-       # _do_augment_maildir, _do_augment_mbox
+       # _do_augment_maildir, _do_augment_mbox, or _do_augment_imap
        my $m = "_do_augment_$self->{base_type}";
        $self->$m($lei);
 }
@@ -516,6 +527,26 @@ sub ipc_atfork_child {
        my ($self) = @_;
        my $lei = delete $self->{lei};
        $lei->lei_atfork_child;
+       my $aug;
+       if (lock_free($self)) {
+               my $mod = $self->{-wq_nr_workers};
+               my $shard = $self->{-wq_worker_nr};
+               if (my $nwr = $lei->{nwr}) {
+                       $nwr->{shard_info} = [ $mod, $shard ];
+               } else { # Maildir (MH?)
+                       $self->{shard_info} = [ $mod, $shard ];
+               }
+               $aug = '+'; # incr_post_augment
+       } elsif ($self->{-wq_worker_nr} == 0) {
+               $aug = '.'; # do_post_augment
+       }
+       if ($aug) {
+               local $0 = 'do_augment';
+               eval { do_augment($self, $lei) };
+               $lei->fail($@) if $@;
+               pkt_do($lei->{pkt_op_p}, $aug) == 1 or
+                                       die "do_post_augment trigger: $!";
+       }
        if (my $zpipe = delete $lei->{zpipe}) {
                $lei->{1} = $zpipe->[1];
                close $zpipe->[0];