]> Sergey Matveev's repositories - public-inbox.git/commitdiff
thread: remove Mail::Thread dependency
authorEric Wong <e@80x24.org>
Wed, 5 Oct 2016 23:47:17 +0000 (23:47 +0000)
committerEric Wong <e@80x24.org>
Wed, 5 Oct 2016 23:52:07 +0000 (23:52 +0000)
Introduce our own SearchThread class for threading messages.
This should allow us to specialize and optimize away objects
in future commits.

INSTALL
MANIFEST
Makefile.PL
lib/PublicInbox/SearchIdx.pm
lib/PublicInbox/SearchThread.pm [new file with mode: 0644]
lib/PublicInbox/SearchView.pm
lib/PublicInbox/Thread.pm [deleted file]
lib/PublicInbox/View.pm
lib/PublicInbox/WWW.pm
t/plack.t

diff --git a/INSTALL b/INSTALL
index 5851892c4b11f4593625b22030907f9cad5fdcbb..3a2f840caf9e55dc9aeb2161894801c3782f46c0 100644 (file)
--- a/INSTALL
+++ b/INSTALL
@@ -37,7 +37,6 @@ Optional components:
 Optional Perl modules:
 
   - Plack[1]                   libplack-perl
-  - Mail::Thread (2.5+)[1]     libmail-thread-perl
   - URI::Escape[1]             liburi-perl
   - Search::Xapian[2][3]       libsearch-xapian-perl
   - IO::Compress::Gzip[3]      perl-modules (or libio-compress-perl)
index c39fa261b37bdef8c9faf8209a281d5d736b7c6a..bcc41216ef2bd9d9604f04d18f44f27bf6f9d90b 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -78,11 +78,11 @@ lib/PublicInbox/SaPlugin/ListMirror.pm
 lib/PublicInbox/Search.pm
 lib/PublicInbox/SearchIdx.pm
 lib/PublicInbox/SearchMsg.pm
+lib/PublicInbox/SearchThread.pm
 lib/PublicInbox/SearchView.pm
 lib/PublicInbox/Spamcheck/Spamc.pm
 lib/PublicInbox/Spawn.pm
 lib/PublicInbox/SpawnPP.pm
-lib/PublicInbox/Thread.pm
 lib/PublicInbox/Unsubscribe.pm
 lib/PublicInbox/View.pm
 lib/PublicInbox/WWW.pm
index 4a911037c7c6ded4428d5aa2af92779bbe3b6c96..0bac7c95a6df47bfb7a14fc1d26b88e47619ce30 100644 (file)
@@ -22,7 +22,6 @@ WriteMakefile(
                'Email::MIME::ContentType' => 0,
                'Email::Simple' => 0,
                'Encode::MIME::Header' => 0,
-               'Mail::Thread' => '2.5', # 2.5+ needed for Email::Simple compat
                'Plack' => 0,
                'URI::Escape' => 0,
                # We have more test dependencies, but do not force
index 23aef9f32e959ea1ee3eb3b95957b2ebfe442a6b..4aac0281304146c39e2eb4d25cf8575c121a84a0 100644 (file)
@@ -4,8 +4,8 @@
 #
 # Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use
 # with the web and NNTP interfaces.  This index maintains thread
-# relationships for use by Mail::Thread.  This writes to the search
-# index.
+# relationships for use by PublicInbox::SearchThread.
+# This writes to the search index.
 package PublicInbox::SearchIdx;
 use strict;
 use warnings;
diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm
new file mode 100644 (file)
index 0000000..41fe859
--- /dev/null
@@ -0,0 +1,323 @@
+# This library is free software; you can redistribute it and/or modify
+# it under the same terms as Perl itself.
+#
+# This license differs from the rest of public-inbox
+#
+# Our own jwz-style threading class based on Mail::Thread from CPAN.
+# Mail::Thread is unmaintained and available on some distros.
+# We also do not want pruning or subject grouping, since we want
+# to encourage strict threading and hopefully encourage people
+# to use proper In-Reply-To.
+#
+# This includes fixes from several open bugs for Mail::Thread
+#
+# Avoid circular references
+# - https://rt.cpan.org/Public/Bug/Display.html?id=22817
+#
+# And avoid recursion in recurse_down:
+# - https://rt.cpan.org/Ticket/Display.html?id=116727
+# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479
+package PublicInbox::SearchThread;
+use strict;
+use warnings;
+use Email::Abstract;
+
+sub new {
+       return bless {
+               messages => $_[1],
+               id_table => {},
+               rootset  => []
+       }, $_[0];
+}
+
+sub _get_hdr {
+       my ($class, $msg, $hdr) = @_;
+       Email::Abstract->get_header($msg, $hdr) || '';
+}
+
+sub _uniq {
+       my %seen;
+       return grep { !$seen{$_}++ } @_;
+}
+
+sub _references {
+       my $class = shift;
+       my $msg = shift;
+       my @references = ($class->_get_hdr($msg, "References") =~ /<([^>]+)>/g);
+       my $foo = $class->_get_hdr($msg, "In-Reply-To");
+       chomp $foo;
+       $foo =~ s/.*?<([^>]+)>.*/$1/;
+       push @references, $foo
+         if $foo =~ /^\S+\@\S+$/ && (!@references || $references[-1] ne $foo);
+       return _uniq(@references);
+}
+
+sub _msgid {
+       my ($class, $msg) = @_;
+       my $id = $class->_get_hdr($msg, "Message-ID");
+       die "attempt to thread message with no id" unless $id;
+       chomp $id;
+       $id =~ s/^<([^>]+)>.*/$1/; # We expect this not to have <>s
+       return $id;
+}
+
+sub rootset { @{$_[0]{rootset}} }
+
+sub thread {
+       my $self = shift;
+       $self->_setup();
+       $self->{rootset} = [ grep { !$_->parent } values %{$self->{id_table}} ];
+       $self->_finish();
+}
+
+sub _finish {
+       my $self = shift;
+       delete $self->{id_table};
+       delete $self->{seen};
+}
+
+sub _get_cont_for_id {
+       my $self = shift;
+       my $id = shift;
+       $self->{id_table}{$id} ||= $self->_container_class->new($id);
+}
+
+sub _container_class { 'PublicInbox::SearchThread::Container' }
+
+sub _setup {
+       my ($self) = @_;
+
+       _add_message($self, $_) foreach @{$self->{messages}};
+}
+
+sub _add_message ($$) {
+       my ($self, $message) = @_;
+
+       # A. if id_table...
+       my $this_container = $self->_get_cont_for_id($self->_msgid($message));
+       $this_container->message($message);
+
+       # B. For each element in the message's References field:
+       my @refs = $self->_references($message);
+
+       my $prev;
+       for my $ref (@refs) {
+               # Find a Container object for the given Message-ID
+               my $container = $self->_get_cont_for_id($ref);
+
+               # Link the References field's Containers together in the
+               # order implied by the References header
+               # * If they are already linked don't change the existing links
+               # * Do not add a link if adding that link would introduce
+               #   a loop...
+
+               if ($prev &&
+                       !$container->parent &&  # already linked
+                       !$container->has_descendent($prev) # would loop
+                  ) {
+                       $prev->add_child($container);
+               }
+               $prev = $container;
+       }
+
+       # C. Set the parent of this message to be the last element in
+       # References...
+       if ($prev &&
+               !$this_container->has_descendent($prev) # would loop
+          ) {
+               $prev->add_child($this_container)
+       }
+}
+
+sub order {
+       my $self = shift;
+       my $ordersub = shift;
+
+       # make a fake root
+       my $root = $self->_container_class->new( 'fakeroot' );
+       $root->add_child( $_ ) for @{ $self->{rootset} };
+
+       # sort it
+       $root->order_children( $ordersub );
+
+       # and untangle it
+       my @kids = $root->children;
+       $self->{rootset} = \@kids;
+       $root->remove_child($_) for @kids;
+}
+
+package PublicInbox::SearchThread::Container;
+use Carp qw(croak);
+use Scalar::Util qw(weaken);
+
+sub new { my $self = shift; bless { id => shift }, $self; }
+
+sub message { $_[0]->{message} = $_[1] if @_ == 2; $_[0]->{message} }
+sub parent { @_ == 2 ? weaken($_[0]->{parent} = $_[1]) : $_[0]->{parent} }
+sub child { $_[0]->{child} = $_[1] if @_ == 2; $_[0]->{child} }
+sub next { $_[0]->{next} = $_[1] if @_ == 2; $_[0]->{next} }
+sub messageid { $_[0]->{id} }
+
+sub add_child {
+       my ($self, $child) = @_;
+       croak "Cowardly refusing to become my own parent: $self"
+         if $self == $child;
+
+       if (grep { $_ == $child } $self->children) {
+               # All is potentially correct with the world
+               $child->parent($self);
+               return;
+       }
+
+       $child->parent->remove_child($child) if $child->parent;
+
+       $child->next($self->child);
+       $self->child($child);
+       $child->parent($self);
+}
+
+sub remove_child {
+       my ($self, $child) = @_;
+       return unless $self->child;
+       if ($self->child == $child) {  # First one's easy.
+               $self->child($child->next);
+               $child->next(undef);
+               $child->parent(undef);
+               return;
+       }
+
+       my $x = $self->child;
+       my $prev = $x;
+       while ($x = $x->next) {
+               if ($x == $child) {
+                       $prev->next($x->next); # Unlink x
+                       $x->next(undef);
+                       $x->parent(undef);       # Deparent it
+                       return;
+               }
+               $prev = $x;
+       }
+       # oddly, we can get here
+       $child->next(undef);
+       $child->parent(undef);
+}
+
+sub has_descendent {
+       my $self = shift;
+       my $child = shift;
+       die "Assertion failed: $child" unless eval {$child};
+       my $there = 0;
+       $self->recurse_down(sub { $there = 1 if $_[0] == $child });
+
+       return $there;
+}
+
+sub children {
+       my $self = shift;
+       my @children;
+       my $visitor = $self->child;
+       while ($visitor) {
+               push @children, $visitor;
+               $visitor = $visitor->next
+       }
+       return @children;
+}
+
+sub set_children {
+       my $self = shift;
+       my $walk = $self->child( shift );
+       while (@_) { $walk = $walk->next( shift ) }
+       $walk->next(undef) if $walk;
+}
+
+sub order_children {
+       my $self = shift;
+       my $ordersub = shift;
+
+       return unless $ordersub;
+
+       my $sub = sub {
+               my $cont = shift;
+               my @children = $cont->children;
+               return if @children < 2;
+               $cont->set_children( $ordersub->( @children ) );
+       };
+       $self->iterate_down( undef, $sub );
+       undef $sub;
+}
+
+# non-recursive version of recurse_down to avoid stack depth warnings
+sub recurse_down {
+       my ($self, $callback) = @_;
+       my %seen;
+       my @q = ($self);
+       while (my $cont = shift @q) {
+               $seen{$cont}++;
+               $callback->($cont);
+
+               if (my $next = $cont->next) {
+                       if ($seen{$next}) {
+                               $cont->next(undef);
+                       } else {
+                               push @q, $next;
+                       }
+               }
+               if (my $child = $cont->child) {
+                       if ($seen{$child}) {
+                               $cont->child(undef);
+                       } else {
+                               push @q, $child;
+                       }
+               }
+       }
+}
+
+sub iterate_down {
+       my $self = shift;
+       my ($before, $after) = @_;
+
+       my %seen;
+       my $walk = $self;
+       my $depth = 0;
+       my @visited;
+       while ($walk) {
+               push @visited, [ $walk, $depth ];
+               $before->($walk, $depth) if $before;
+
+               # spot/break loops
+               $seen{$walk}++;
+
+               my $child = $walk->child;
+               if ($child && $seen{$child}) {
+                       $walk->child(undef);
+                       $child = undef;
+               }
+
+               my $next = $walk->next;
+               if ($next && $seen{$next}) {
+                       $walk->next(undef);
+                       $next = undef;
+               }
+
+               # go down, or across
+               if ($child) {
+                       $next = $child;
+                       ++$depth;
+               }
+
+               # no next?  look up
+               if (!$next) {
+                       my $up = $walk;
+                       while ($up && !$next) {
+                               $up = $up->parent;
+                               --$depth;
+                               $next = $up->next if $up;
+                       }
+               }
+               $walk = $next;
+       }
+       return unless $after;
+       while (@visited) { $after->(@{ pop @visited }) }
+}
+
+1;
index 4f0811a8c6e9776b9036cfc6fec9774c30aa7a09..da31109357a0a579fa5c6aa04cb1bf2d1a59c4c1 100644 (file)
@@ -11,7 +11,7 @@ use PublicInbox::View;
 use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape);
 use Email::MIME;
 require PublicInbox::Git;
-require PublicInbox::Thread;
+require PublicInbox::SearchThread;
 our $LIM = 50;
 
 sub noop {}
@@ -152,7 +152,7 @@ sub mset_thread {
                $m;
        } ($mset->items);
 
-       my $th = PublicInbox::Thread->new(@m);
+       my $th = PublicInbox::SearchThread->new(\@m);
        $th->thread;
        if ($q->{r}) { # order by relevance
                $th->order(sub {
diff --git a/lib/PublicInbox/Thread.pm b/lib/PublicInbox/Thread.pm
deleted file mode 100644 (file)
index 8af9461..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-# subclass Mail::Thread and use this to workaround a memory leak
-# Based on the patch in: https://rt.cpan.org/Public/Bug/Display.html?id=22817
-#
-# Additionally, workaround for a bug where $walk->topmost returns undef:
-# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=795913
-# - https://rt.cpan.org/Ticket/Display.html?id=106498
-#
-# And avoid recursion in recurse_down:
-# - https://rt.cpan.org/Ticket/Display.html?id=116727
-# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479
-#
-# License differs from the rest of public-inbox (but is compatible):
-# This library is free software; you can redistribute it and/or modify
-# it under the same terms as Perl itself.
-package PublicInbox::Thread;
-use strict;
-use warnings;
-use base qw(Mail::Thread);
-# WARNING! both these Mail::Thread knobs were found by inspecting
-# the Mail::Thread 2.55 source code, and we have some monkey patches
-# in PublicInbox::Thread to fix memory leaks.  Since Mail::Thread
-# appears unmaintained, I suppose it's safe to depend on these
-# variables for now:
-{
-       no warnings 'once';
-       # we want strict threads to expose (and hopefully discourage)
-       # use of broken email clients
-       $Mail::Thread::nosubject = 1;
-       # Keep ghosts with only a single direct child,
-       # don't hide that there may be missing messages.
-       $Mail::Thread::noprune = 1;
-}
-
-if ($Mail::Thread::VERSION <= 2.55) {
-       eval q(sub _container_class { 'PublicInbox::Thread::Container' });
-}
-
-package PublicInbox::Thread::Container;
-use strict;
-use warnings;
-use base qw(Mail::Thread::Container);
-use Scalar::Util qw(weaken);
-sub parent { @_ == 2 ? weaken($_[0]->{parent} = $_[1]) : $_[0]->{parent} }
-
-sub topmost {
-       $_[0]->SUPER::topmost || PublicInbox::Thread::CPANRTBug106498->new;
-}
-
-# non-recursive version of recurse_down to avoid stack depth warnings
-sub recurse_down {
-       my ($self, $callback) = @_;
-       my %seen;
-       my @q = ($self);
-       while (my $cont = shift @q) {
-               $seen{$cont}++;
-               $callback->($cont);
-
-               if (my $next = $cont->next) {
-                       if ($seen{$next}) {
-                               $cont->next(undef);
-                       } else {
-                               push @q, $next;
-                       }
-               }
-               if (my $child = $cont->child) {
-                       if ($seen{$child}) {
-                               $cont->child(undef);
-                       } else {
-                               push @q, $child;
-                       }
-               }
-       }
-}
-
-# ref:
-# - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=795913
-# - https://rt.cpan.org/Ticket/Display.html?id=106498
-package PublicInbox::Thread::CPANRTBug106498;
-use strict;
-use warnings;
-
-sub new { bless {}, $_[0] }
-
-sub simple_subject {}
-
-1;
index a3b26814f6897db2268124f29365abae495433de..9f1bf46005ad66c3332ae4e875b5e71cd96254da 100644 (file)
@@ -749,8 +749,8 @@ sub msg_timestamp {
 
 sub thread_results {
        my ($msgs) = @_;
-       require PublicInbox::Thread;
-       my $th = PublicInbox::Thread->new(@$msgs);
+       require PublicInbox::SearchThread;
+       my $th = PublicInbox::SearchThread->new($msgs);
        $th->thread;
        $th->order(*sort_ts);
        $th
index 4d599fc973daec9197e1020bdc15fc260be7e0ef..11fc92e93366fcb4fcc840672f5813b69060711f 100644 (file)
@@ -112,7 +112,7 @@ sub call {
 sub preload {
        require PublicInbox::Feed;
        require PublicInbox::View;
-       require PublicInbox::Thread;
+       require PublicInbox::SearchThread;
        require Email::MIME;
        require Digest::SHA;
        require POSIX;
index 608afb9e8471804b72cda674f691a902a0bc991b..1d624589e810f63b24f927efb616aaa32da9c3ee 100644 (file)
--- a/t/plack.t
+++ b/t/plack.t
@@ -11,8 +11,7 @@ my $pi_config = "$tmpdir/config";
 my $maindir = "$tmpdir/main.git";
 my $addr = 'test-public@example.com';
 my $cfgpfx = "publicinbox.test";
-my @mods = qw(HTTP::Request::Common Plack::Test
-       Mail::Thread URI::Escape);
+my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape);
 foreach my $mod (@mods) {
        eval "require $mod";
        plan skip_all => "$mod missing for plack.t" if $@;