From feb4a75affbe5f940116dc4f6bcc5ddb52a110b4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 7 May 2017 10:49:00 +0000 Subject: [PATCH] searchidx: fix ghost root vivification Due to the asynchronous nature of SMTP, it is possible for the root message of a thread (with no References/In-Reply-To) to arrive last in a series. We must preserve the thread_id of the ghost message in this case, as we do when vivifiying non-root ghosts. Otherwise, this causes threads to be broken when the root arrives last. --- MANIFEST | 1 + lib/PublicInbox/Search.pm | 3 +- lib/PublicInbox/SearchIdx.pm | 2 +- t/search-thr-index.t | 58 ++++++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 t/search-thr-index.t diff --git a/MANIFEST b/MANIFEST index f16843a9..d1e09529 100644 --- a/MANIFEST +++ b/MANIFEST @@ -157,6 +157,7 @@ t/psgi_attach.t t/psgi_mount.t t/psgi_text.t t/qspawn.t +t/search-thr-index.t t/search.t t/spamcheck_spamc.t t/spawn.t diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index bc2b6985..82a6e541 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -41,7 +41,8 @@ use constant { # 12 - change YYYYMMDD value column to numeric # 13 - fix threading for empty References/In-Reply-To # (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0) - SCHEMA_VERSION => 13, + # 14 - fix ghost root vivification + SCHEMA_VERSION => 14, # n.b. FLAG_PURE_NOT is expensive not suitable for a public website # as it could become a denial-of-service vector diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 8a529c66..e4e3c81b 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -330,7 +330,7 @@ sub link_message { merge_threads($self, $tid, $ptid); } } else { - $tid = $self->next_thread_id; + $tid = defined $old_tid ? $old_tid : $self->next_thread_id; } $doc->add_term(xpfx('thread') . $tid); } diff --git a/t/search-thr-index.t b/t/search-thr-index.t new file mode 100644 index 00000000..65495546 --- /dev/null +++ b/t/search-thr-index.t @@ -0,0 +1,58 @@ +# Copyright (C) 2017 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +use Email::MIME; +eval { require PublicInbox::SearchIdx; }; +plan skip_all => "Xapian missing for search" if $@; +my $tmpdir = tempdir('pi-search-thr-index.XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $git_dir = "$tmpdir/a.git"; + +is(0, system(qw(git init -q --bare), $git_dir), "git init (main)"); +my $rw = PublicInbox::SearchIdx->new($git_dir, 1); +ok($rw, "search indexer created"); +my $data = <<'EOF'; +Subject: [RFC 00/14] +Message-Id: <1-bw@g> + +Subject: [RFC 09/14] +Message-Id: <10-bw@g> +In-Reply-To: <1-bw@g> +References: <1-bw@g> + +Subject: [RFC 03/14] +Message-Id: <4-bw@g> +In-Reply-To: <1-bw@g> +References: <1-bw@g> + +EOF + +my $num = 0; +# nb. using internal API, fragile! +my $xdb = $rw->_xdb_acquire; +$xdb->begin_transaction; +my @mids; + +foreach (reverse split(/\n\n/, $data)) { + $_ .= "\n"; + my $mime = Email::MIME->new(\$_); + $mime->header_set('From' => 'bw@g'); + $mime->header_set('To' => 'git@vger.kernel.org'); + my $bytes = bytes::length($mime->as_string); + my $doc_id = $rw->add_message($mime, $bytes, ++$num, 'ignored'); + my $mid = $mime->header('Message-Id'); + push @mids, $mid; + ok($doc_id, 'message added: '. $mid); +} + +my $prev; +foreach my $mid (@mids) { + my $res = $rw->get_thread($mid); + is(3, $res->{total}, "got all messages from $mid"); +} + +done_testing(); + +1; -- 2.44.0