lib/PublicInbox/LeiDedupe.pm | 4 ++-- lib/PublicInbox/LeiOverview.pm | 18 ++++++++++-------- lib/PublicInbox/LeiToMail.pm | 3 +-- diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm index 3f478aa4a786ba3280344ec2c10de9f2beb015f7..e3ae8e33c36642be9bb57aa492884fc6487a06c4 100644 --- a/lib/PublicInbox/LeiDedupe.pm +++ b/lib/PublicInbox/LeiDedupe.pm @@ -103,8 +103,8 @@ # [ $skv, $eml_cb, $smsg_cb, "dedupe_$dd" ] bless [ $skv, undef, undef, $m ], $cls; } -# returns true on unseen messages according to the deduplication strategy, -# returns false if seen +# returns true on seen messages according to the deduplication strategy, +# returns false if unseen sub is_dup { my ($self, $eml, $oid) = @_; !$self->[1]->($eml, $oid); diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index c67e2747edfac4faeb1613abcca10a8263a73775..fa04145762c573838ee8b89e0cd2466fc6758fee 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -92,13 +92,14 @@ } else { ovv_out_lk_init($self); } } - if (!$json) { + if ($json) { + $lei->{dedupe} //= PublicInbox::LeiDedupe->new($lei); + } else { # default to the cheapest sort since MUA usually resorts $lei->{opt}->{'sort'} //= 'docid' if $dst ne '/dev/stdout'; $lei->{l2m} = eval { PublicInbox::LeiToMail->new($lei) }; return $lei->fail($@) if $@; } - $lei->{dedupe} //= PublicInbox::LeiDedupe->new($lei); $self; } @@ -201,15 +202,19 @@ } sub ovv_each_smsg_cb { # runs in wq worker usually my ($self, $lei, $ibxish) = @_; - my $json; + my ($json, $dedupe); $lei->{1}->autoflush(1); - my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing'; if (my $pkg = $self->{json}) { $json = $pkg->new; $json->utf8->canonical; $json->ascii(1) if $lei->{opt}->{ascii}; } - my $l2m = $lei->{l2m} or $dedupe->prepare_dedupe; + my $l2m = $lei->{l2m}; + if (!$l2m) { + $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing'; + $dedupe->prepare_dedupe; + } + $lei->{ovv_buf} = \(my $buf = '') if !$l2m; if ($l2m && !$ibxish) { # remote https?:// mboxrd delete $l2m->{-wq_s1}; my $g2m = $l2m->can('git_to_mail'); @@ -241,7 +246,6 @@ my $wcb = $l2m->write_cb($lei); my $git = $ibxish->git; # (LeiXSearch|Inbox|ExtSearch)->git $self->{git} = $git; # for ovv_atexit_child my $g2m = $l2m->can('git_to_mail'); - $dedupe->prepare_dedupe; sub { my ($smsg, $mitem) = @_; $smsg->{pct} = get_pct($mitem) if $mitem; @@ -249,7 +253,6 @@ $git->cat_async($smsg->{blob}, $g2m, [ $wcb, $smsg ]); }; } elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) { my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},"; - $lei->{ovv_buf} = \(my $buf = ''); sub { # DIY prettiness :P my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); @@ -273,7 +276,6 @@ } } } elsif ($json) { my $ORS = $self->{fmt} eq 'json' ? ",\n" : "\n"; # JSONL - $lei->{ovv_buf} = \(my $buf = ''); sub { my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 61b546b5ec8ae13892ba5070666353b845eaba62..244bfb6746a914f6436ab3ec9d7a5696a712d0da 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -323,7 +323,7 @@ sub _maildir_write_cb ($$) { my ($self, $lei) = @_; my $dedupe = $lei->{dedupe}; - $dedupe->prepare_dedupe; + $dedupe->prepare_dedupe if $dedupe; my $dst = $lei->{ovv}->{dst}; sub { # for git_to_mail my ($buf, $smsg, $eml) = @_; @@ -464,7 +464,6 @@ my $not_done = delete $self->{$lei->{each_smsg_not_done}}; my $wcb = $self->{wcb} //= do { # first message my %sig = $lei->atfork_child_wq($self); @SIG{keys %sig} = values %sig; # not local - $lei->{dedupe}->prepare_dedupe; $self->write_cb($lei); }; my $git = $self->{"$$\0$git_dir"} //= PublicInbox::Git->new($git_dir);