From: Eric Wong Date: Thu, 14 Jan 2021 07:06:21 +0000 (-1200) Subject: lei q: reinstate smsg dedupe X-Git-Tag: v1.7.0~1369 X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=97a3b1b79eda65b494e9c01bc99afb4df33a32d5;p=public-inbox.git lei q: reinstate smsg dedupe Now that dedupe is serialization and fork-safe, we can wire it back up in our query results paths. --- diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 1a3e1193..69d2f9a6 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -26,14 +26,13 @@ sub lei_q { my $sto = $self->_lei_store(1); my $cfg = $self->_lei_cfg(1); my $opt = $self->{opt}; - require PublicInbox::LeiDedupe; - my $dd = PublicInbox::LeiDedupe->new($self); # --local is enabled by default # src: LeiXSearch || LeiSearch || Inbox my @srcs; require PublicInbox::LeiXSearch; require PublicInbox::LeiOverview; + require PublicInbox::LeiDedupe; my $lxs = PublicInbox::LeiXSearch->new; # --external is enabled by default, but allow --no-external @@ -49,8 +48,8 @@ sub lei_q { unshift(@srcs, $sto->search) if $opt->{'local'}; # no forking workers after this - require PublicInbox::LeiOverview; $self->{ovv} = PublicInbox::LeiOverview->new($self); + $self->{dd} = PublicInbox::LeiDedupe->new($self); my %mset_opt = map { $_ => $opt->{$_} } qw(thread limit offset); $mset_opt{asc} = $opt->{'reverse'} ? 1 : 0; $mset_opt{qstr} = join(' ', map {; diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 68889e81..80e7a7f7 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -103,6 +103,8 @@ sub query_thread_mset { # for --thread my $mo = { %{$lei->{mset_opt}} }; my $mset; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); + my $dd = $lei->{dd}; + $dd->prepare_dedupe; do { $mset = $srch->mset($mo->{qstr}, $mo); my $ids = $srch->mset_to_artnums($mset, $mo); @@ -112,7 +114,7 @@ sub query_thread_mset { # for --thread while ($over->expand_thread($ctx)) { for my $n (@{$ctx->{xids}}) { my $smsg = $over->get_art($n) or next; - # next if $dd->is_smsg_dup($smsg); TODO + next if $dd->is_smsg_dup($smsg); my $mitem = delete $n2item{$smsg->{num}}; $each_smsg->($smsg, $mitem); # $self->out($buf .= $ORS); @@ -132,11 +134,13 @@ sub query_mset { # non-parallel for non-"--thread" users my $mset; $self->attach_external($_) for @$srcs; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); + my $dd = $lei->{dd}; + $dd->prepare_dedupe; do { $mset = $self->mset($mo->{qstr}, $mo); for my $it ($mset->items) { my $smsg = smsg_for($self, $it) or next; - # next if $dd->is_smsg_dup($smsg); + next if $dd->is_smsg_dup($smsg); $each_smsg->($smsg, $it); # $self->out($buf .= $ORS) if defined $buf; #$emit_cb->($smsg);