From: Eric Wong Date: Mon, 3 May 2021 20:57:31 +0000 (+0000) Subject: lei up: fix dedupe with remote externals on Maildir + IMAP X-Git-Tag: v1.7.0~697 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=d98abf13946beaf685804e4ec2d79019129415b9 lei up: fix dedupe with remote externals on Maildir + IMAP LeiToMail Maildir and IMAP write callbacks need to account for the caller-supplied smsg. We'll also make better use of the user-supplied smsg object by ensuring blob deduplication happens ASAP. Fixes: e76683309ca4f254 ("lei : distinguish between mset and l2m counts") --- diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm index 8177c98e..92ced28b 100644 --- a/lib/PublicInbox/LeiSavedSearch.pm +++ b/lib/PublicInbox/LeiSavedSearch.pm @@ -170,23 +170,24 @@ sub cfg_set { # called by LeiXSearch sub is_dup { my ($self, $eml, $smsg) = @_; my $oidx = $self->{oidx} // die 'BUG: no {oidx}'; - my $blob = $smsg ? $smsg->{blob} : undef; - my $lk = $self->lock_for_scope_fast; - return 1 if $blob && $oidx->blob_exists($blob); + my $lk; if ($self->{-dedupe_mid}) { + $lk //= $self->lock_for_scope_fast; for my $mid (@{mids_for_index($eml)}) { my ($id, $prv); return 1 if $oidx->next_by_mid($mid, \$id, \$prv); } } + my $blob = $smsg ? $smsg->{blob} : git_sha(1, $eml)->hexdigest; + $lk //= $self->lock_for_scope_fast; + return 1 if $oidx->blob_exists($blob); if (my $xoids = PublicInbox::LeiSearch::xoids_for($self, $eml, 1)) { for my $docid (values %$xoids) { $oidx->add_xref3($docid, -1, $blob, '.'); } $oidx->commit_lazy; if ($self->{-dedupe_oid}) { - $smsg->{blob} //= git_sha(1, $eml)->hexdigest; - exists $xoids->{$smsg->{blob}} ? 1 : undef; + exists $xoids->{$blob} ? 1 : undef; } else { 1; } @@ -197,11 +198,11 @@ sub is_dup { $smsg->{bytes} = 0; $smsg->populate($eml); } + $smsg->{blob} //= $blob; $oidx->begin_lazy; $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); - $smsg->{blob} //= git_sha(1, $eml)->hexdigest; $oidx->add_overview($eml, $smsg); - $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); + $oidx->add_xref3($smsg->{num}, -1, $blob, '.'); $oidx->commit_lazy; undef; } diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 71acf952..64061788 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -283,7 +283,8 @@ sub _maildir_write_cb ($$) { my ($bref, $smsg, $eml) = @_; $dst // return $lei->fail; # dst may be undef-ed in last run return if $dedupe && $dedupe->is_dup($eml // - PublicInbox::Eml->new($$bref)); + PublicInbox::Eml->new($$bref), + $smsg); $lse->xsmsg_vmd($smsg) if $lse; my $n = _buf2maildir($dst, $bref // \($eml->as_string), $smsg); $sto->ipc_do('set_sync_info', $smsg->{blob}, $out, $n) if $sto; @@ -305,7 +306,8 @@ sub _imap_write_cb ($$) { my ($bref, $smsg, $eml) = @_; $mic // return $lei->fail; # mic may be undef-ed in last run return if $dedupe && $dedupe->is_dup($eml // - PublicInbox::Eml->new($$bref)); + PublicInbox::Eml->new($$bref), + $smsg); $lse->xsmsg_vmd($smsg) if $lse; my $uid = eval { $append->($mic, $folder, $bref, $smsg, $eml) }; if (my $err = $@) { diff --git a/t/lei-q-remote-import.t b/t/lei-q-remote-import.t index 32c5172b..80067061 100644 --- a/t/lei-q-remote-import.t +++ b/t/lei-q-remote-import.t @@ -91,5 +91,11 @@ EOF lei_ok(qw(q -o mboxrd:/dev/stdout m:never-before-seen@example.com)); like($lei_out, qr/seen\@example\.com>\nStatus: RO\n\nwhatever/sm, '--import-before imported totally unseen message'); + + lei_ok(qw(q --save z:0.. -o), "$ENV{HOME}/md", '--only', $url); + my @f = glob("$ENV{HOME}/md/*/*"); + lei_ok('up', "$ENV{HOME}/md"); + is_deeply(\@f, [ glob("$ENV{HOME}/md/*/*") ], + 'lei up remote dedupe works on maildir'); }); done_testing;