]> Sergey Matveev's repositories - public-inbox.git/commitdiff
lei up: fix dedupe with remote externals on Maildir + IMAP
authorEric Wong <e@80x24.org>
Mon, 3 May 2021 20:57:31 +0000 (20:57 +0000)
committerEric Wong <e@80x24.org>
Tue, 4 May 2021 02:35:34 +0000 (02:35 +0000)
LeiToMail Maildir and IMAP write callbacks need to account for
the caller-supplied smsg.  We'll also make better use of the
user-supplied smsg object by ensuring blob deduplication happens
ASAP.

Fixes: e76683309ca4f254 ("lei <q|up>: distinguish between mset and l2m counts")
lib/PublicInbox/LeiSavedSearch.pm
lib/PublicInbox/LeiToMail.pm
t/lei-q-remote-import.t

index 8177c98e84182ec731937aee671e312003266a91..92ced28bd9a48114456d5b1c88e872ca132c2b6a 100644 (file)
@@ -170,23 +170,24 @@ sub cfg_set { # called by LeiXSearch
 sub is_dup {
        my ($self, $eml, $smsg) = @_;
        my $oidx = $self->{oidx} // die 'BUG: no {oidx}';
-       my $blob = $smsg ? $smsg->{blob} : undef;
-       my $lk = $self->lock_for_scope_fast;
-       return 1 if $blob && $oidx->blob_exists($blob);
+       my $lk;
        if ($self->{-dedupe_mid}) {
+               $lk //= $self->lock_for_scope_fast;
                for my $mid (@{mids_for_index($eml)}) {
                        my ($id, $prv);
                        return 1 if $oidx->next_by_mid($mid, \$id, \$prv);
                }
        }
+       my $blob = $smsg ? $smsg->{blob} : git_sha(1, $eml)->hexdigest;
+       $lk //= $self->lock_for_scope_fast;
+       return 1 if $oidx->blob_exists($blob);
        if (my $xoids = PublicInbox::LeiSearch::xoids_for($self, $eml, 1)) {
                for my $docid (values %$xoids) {
                        $oidx->add_xref3($docid, -1, $blob, '.');
                }
                $oidx->commit_lazy;
                if ($self->{-dedupe_oid}) {
-                       $smsg->{blob} //= git_sha(1, $eml)->hexdigest;
-                       exists $xoids->{$smsg->{blob}} ? 1 : undef;
+                       exists $xoids->{$blob} ? 1 : undef;
                } else {
                        1;
                }
@@ -197,11 +198,11 @@ sub is_dup {
                        $smsg->{bytes} = 0;
                        $smsg->populate($eml);
                }
+               $smsg->{blob} //= $blob;
                $oidx->begin_lazy;
                $smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
-               $smsg->{blob} //= git_sha(1, $eml)->hexdigest;
                $oidx->add_overview($eml, $smsg);
-               $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
+               $oidx->add_xref3($smsg->{num}, -1, $blob, '.');
                $oidx->commit_lazy;
                undef;
        }
index 71acf952c046fc0c984b2205110723afba4df84f..64061788eae05c219f71de5d5bea974819b2baee 100644 (file)
@@ -283,7 +283,8 @@ sub _maildir_write_cb ($$) {
                my ($bref, $smsg, $eml) = @_;
                $dst // return $lei->fail; # dst may be undef-ed in last run
                return if $dedupe && $dedupe->is_dup($eml //
-                                               PublicInbox::Eml->new($$bref));
+                                               PublicInbox::Eml->new($$bref),
+                                               $smsg);
                $lse->xsmsg_vmd($smsg) if $lse;
                my $n = _buf2maildir($dst, $bref // \($eml->as_string), $smsg);
                $sto->ipc_do('set_sync_info', $smsg->{blob}, $out, $n) if $sto;
@@ -305,7 +306,8 @@ sub _imap_write_cb ($$) {
                my ($bref, $smsg, $eml) = @_;
                $mic // return $lei->fail; # mic may be undef-ed in last run
                return if $dedupe && $dedupe->is_dup($eml //
-                                               PublicInbox::Eml->new($$bref));
+                                               PublicInbox::Eml->new($$bref),
+                                               $smsg);
                $lse->xsmsg_vmd($smsg) if $lse;
                my $uid = eval { $append->($mic, $folder, $bref, $smsg, $eml) };
                if (my $err = $@) {
index 32c5172bdea63051a758da84773a603e1864742a..800670610f5129ab2d9f027e5243ac8b849bba68 100644 (file)
@@ -91,5 +91,11 @@ EOF
        lei_ok(qw(q -o mboxrd:/dev/stdout m:never-before-seen@example.com));
        like($lei_out, qr/seen\@example\.com>\nStatus: RO\n\nwhatever/sm,
                '--import-before imported totally unseen message');
+
+       lei_ok(qw(q --save z:0.. -o), "$ENV{HOME}/md", '--only', $url);
+       my @f = glob("$ENV{HOME}/md/*/*");
+       lei_ok('up', "$ENV{HOME}/md");
+       is_deeply(\@f, [ glob("$ENV{HOME}/md/*/*") ],
+               'lei up remote dedupe works on maildir');
 });
 done_testing;