]> Sergey Matveev's repositories - public-inbox.git/commitdiff
over: ensure old, merged {tid} is really gone
authorEric Wong <e@80x24.org>
Fri, 4 Dec 2020 12:09:29 +0000 (12:09 +0000)
committerEric Wong <e@80x24.org>
Sat, 26 Dec 2020 19:45:11 +0000 (19:45 +0000)
We must use the result of link_refs() since it can trigger
merge_threads() and invalidate $old_tid.  In case
merge_threads() isn't triggered, link_refs() will return
$old_tid anyways.

When rethreading and allocating new {tid}, we also must update
the row where the now-expired {tid} came from to ensure only the
new {tid} is seen when reindexing subsequent messages in
history.  Otherwise, every subsequently reindexed+rethreaded
message could end up getting a new {tid}.

Reported-by: Kyle Meyer <kyle@kyleam.com>
Link: https://public-inbox.org/meta/87360nlc44.fsf@kyleam.com/
(cherry picked from commit 9356ec0cc5afc95a8fd398ddf898942ef0acdb74)

MANIFEST
lib/PublicInbox/OverIdx.pm
t/thread-index-gap.t [new file with mode: 0644]

index f3620de4682f98c21860ceeab4eaa0d0ee894983..6c1cdeccc7527035defab6216944828c6cf72519 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -354,6 +354,7 @@ t/solver_git.t
 t/spamcheck_spamc.t
 t/spawn.t
 t/thread-cycle.t
+t/thread-index-gap.t
 t/time.t
 t/uri_imap.t
 t/utf8.eml
index db4b7738dfd602b339072bd8d7b6db31d7e71a54..840e2c2a11aa92047a7fd0c3c51c40ebe7299337 100644 (file)
@@ -170,8 +170,14 @@ sub _resolve_mid_to_tid {
                $$tid = $cur_tid;
        } else { # rethreading, queue up dead ghosts
                $$tid = next_tid($self);
-               my $num = $smsg->{num};
-               push(@{$self->{-ghosts_to_delete}}, $num) if $num < 0;
+               my $n = $smsg->{num};
+               if ($n > 0) {
+                       $self->{dbh}->prepare_cached(<<'')->execute($$tid, $n);
+UPDATE over SET tid = ? WHERE num = ?
+
+               } elsif ($n < 0) {
+                       push(@{$self->{-ghosts_to_delete}}, $n);
+               }
        }
        1;
 }
@@ -294,7 +300,7 @@ sub _add_over {
                }
        } elsif ($n < 0) { # ghost
                $$old_tid //= $cur_valid ? $cur_tid : next_tid($self);
-               link_refs($self, $refs, $$old_tid);
+               $$old_tid = link_refs($self, $refs, $$old_tid);
                delete_by_num($self, $n);
                $$v++;
        }
diff --git a/t/thread-index-gap.t b/t/thread-index-gap.t
new file mode 100644 (file)
index 0000000..49f254e
--- /dev/null
@@ -0,0 +1,57 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Eml;
+use PublicInbox::InboxWritable;
+use PublicInbox::Config;
+use List::Util qw(shuffle);
+require_mods(qw(DBD::SQLite));
+require_git(2.6);
+
+chomp(my @msgs = split(/\n\n/, <<'EOF')); # "git log" order
+Subject: [bug#45000] [PATCH 1/9]
+References: <20201202045335.31096-1-j@example.com>
+Message-Id: <20201202045540.31248-1-j@example.com>
+
+Subject: [bug#45000] [PATCH 0/9]
+Message-Id: <20201202045335.31096-1-j@example.com>
+
+Subject: [bug#45000] [PATCH 0/9]
+References: <20201202045335.31096-1-j@example.com>
+Message-ID: <86sg8o1mou.fsf@example.com>
+
+Subject: [bug#45000] [PATCH 8/9]
+Message-Id: <20201202045540.31248-8-j@example.com>
+References: <20201202045540.31248-1-j@example.com>
+
+EOF
+
+my ($home, $for_destroy) = tmpdir();
+local $ENV{HOME} = $home;
+for my $msgs (['orig', reverse @msgs], ['shuffle', shuffle(@msgs)]) {
+       my $desc = shift @$msgs;
+       my $n = "index-cap-$desc";
+       run_script([qw(-init -L basic -V2), $n, "$home/$n",
+               "http://example.com/$n", "$n\@example.com"]) or
+               BAIL_OUT 'init';
+       my $ibx = PublicInbox::Config->new->lookup_name($n);
+       my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
+       for my $m (@$msgs) {
+               $im->add(PublicInbox::Eml->new("$m\nFrom: x\@example.com\n\n"));
+       }
+       $im->done;
+       my $over = $ibx->over;
+       my @tid = $over->dbh->selectall_array('SELECT DISTINCT(tid) FROM over');
+       is(scalar(@tid), 1, "only one thread initially ($desc)");
+       $over->dbh_close;
+       run_script([qw(-index --reindex --rethread), $ibx->{inboxdir}]) or
+               BAIL_OUT 'rethread';
+       @tid = $over->dbh->selectall_array('SELECT DISTINCT(tid) FROM over');
+       is(scalar(@tid), 1, "only one thread after rethread ($desc)");
+}
+
+done_testing;