From 38c481a5438593cff686709493a70b8a6b3033d1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 3 Oct 2017 19:43:30 +0000 Subject: [PATCH] search: try to fill in ghosts when generating thread skeleton Since we attempt to fill in threads by Subject, our thread skeletons can cross actual thread IDs, leading to the possibility of false ghosts showing up in the skeleton. Try to fill in the ghosts as well as possible by performing a message lookup. --- lib/PublicInbox/Search.pm | 2 +- lib/PublicInbox/SearchMsg.pm | 16 ++++++++++++++++ lib/PublicInbox/SearchThread.pm | 18 ++++++++++-------- lib/PublicInbox/SearchView.pm | 6 ++++-- lib/PublicInbox/View.pm | 10 +++++----- 5 files changed, 36 insertions(+), 16 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index c7c5455d..25ab8d52 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -300,7 +300,7 @@ sub lookup_mail { # no ghosts! my ($self, $mid) = @_; retry_reopen($self, sub { my $smsg = lookup_message($self, $mid) or return; - PublicInbox::SearchMsg->load_doc($smsg->{doc}); + $smsg->load_expand; }); } diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index a19d45db..84e2ad51 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -29,6 +29,22 @@ sub get_val ($$) { Search::Xapian::sortable_unserialise($doc->get_value($col)); } +sub load_expand { + my ($self) = @_; + my $doc = $self->{doc}; + my $data = $doc->get_data or return; + $self->{ts} = get_val($doc, &PublicInbox::Search::TS); + utf8::decode($data); + my ($subj, $from, $refs, $to, $cc, $blob) = split(/\n/, $data); + $self->{subject} = $subj; + $self->{from} = $from; + $self->{references} = $refs; + $self->{to} = $to; + $self->{cc} = $cc; + $self->{blob} = $blob; + $self; +} + sub load_doc { my ($class, $doc) = @_; my $data = $doc->get_data or return; diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 2966907a..6fbce15c 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -22,14 +22,15 @@ use strict; use warnings; sub thread { - my ($messages, $ordersub) = @_; + my ($messages, $ordersub, $srch) = @_; my $id_table = {}; _add_message($id_table, $_) foreach @$messages; my $rootset = [ grep { - !delete($_->{parent}) && $_->visible } values %$id_table ]; + !delete($_->{parent}) && $_->visible($srch) + } values %$id_table ]; $id_table = undef; $rootset = $ordersub->($rootset); - $_->order_children($ordersub) for @$rootset; + $_->order_children($ordersub, $srch) for @$rootset; $rootset; } @@ -129,20 +130,21 @@ sub has_descendent { # Do not show/keep ghosts iff they have no children. Sometimes # a ghost Message-ID is the result of a long header line # being folded/mangled by a MUA, and not a missing message. -sub visible ($) { - my ($self) = @_; - $self->{smsg} || scalar values %{$self->{children}}; +sub visible ($$) { + my ($self, $srch) = @_; + ($self->{smsg} ||= eval { $srch->lookup_mail($self->{id}) }) || + (scalar values %{$self->{children}}); } sub order_children { - my ($cur, $ordersub) = @_; + my ($cur, $ordersub, $srch) = @_; my %seen = ($cur => 1); # self-referential loop prevention my @q = ($cur); while (defined($cur = shift @q)) { my $c = $cur->{children}; # The hashref here... - $c = [ grep { !$seen{$_}++ && visible($_) } values %$c ]; + $c = [ grep { !$seen{$_}++ && visible($_, $srch) } values %$c ]; $c = $ordersub->($c) if scalar @$c > 1; $cur->{children} = $c; # ...becomes an arrayref push @q, @$c; diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index a5974034..c42cf2d6 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -207,7 +207,8 @@ sub sort_relevance { sub mset_thread { my ($ctx, $mset, $q) = @_; my %pct; - my $msgs = $ctx->{srch}->retry_reopen(sub { [ map { + my $srch = $ctx->{srch}; + my $msgs = $srch->retry_reopen(sub { [ map { my $i = $_; my $smsg = PublicInbox::SearchMsg->load_doc($i->get_document); $pct{$smsg->mid} = $i->get_percent; @@ -215,7 +216,8 @@ sub mset_thread { } ($mset->items) ]}); my $r = $q->{r}; my $rootset = PublicInbox::SearchThread::thread($msgs, - $r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts); + $r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts, + $srch); my $skel = search_nav_bot($mset, $q). "
";
 	my $inbox = $ctx->{-inbox};
 	$ctx->{-upfx} = '';
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index b39c8203..ac7657ae 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -370,7 +370,7 @@ sub thread_html {
 	$ctx->{mapping} = {};
 	$ctx->{s_nr} = "$nr+ messages in thread";
 
-	my $rootset = thread_results($msgs);
+	my $rootset = thread_results($msgs, $srch);
 
 	# reduce hash lookups in pre_thread->skel_dump
 	my $inbox = $ctx->{-inbox};
@@ -607,7 +607,7 @@ sub thread_skel {
 	# reduce hash lookups in skel_dump
 	my $ibx = $ctx->{-inbox};
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
-	walk_thread(thread_results($sres), $ctx, *skel_dump);
+	walk_thread(thread_results($sres, $srch), $ctx, *skel_dump);
 
 	$ctx->{parent_msg} = $parent;
 }
@@ -736,9 +736,9 @@ sub msg_timestamp {
 }
 
 sub thread_results {
-	my ($msgs) = @_;
+	my ($msgs, $srch) = @_;
 	require PublicInbox::SearchThread;
-	PublicInbox::SearchThread::thread($msgs, *sort_ts);
+	PublicInbox::SearchThread::thread($msgs, *sort_ts, $srch);
 }
 
 sub missing_thread {
@@ -1000,7 +1000,7 @@ sub index_topics {
 	my $nr = scalar @{$sres->{msgs}};
 	if ($nr) {
 		$sres = load_results($srch, $sres);
-		walk_thread(thread_results($sres), $ctx, *acc_topic);
+		walk_thread(thread_results($sres, $srch), $ctx, *acc_topic);
 	}
 	$ctx->{-next_o} = $off+ $nr;
 	$ctx->{-cur_o} = $off;
-- 
2.44.0