]> Sergey Matveev's repositories - public-inbox.git/commitdiff
search: do not iterate through entire termlist
authorEric Wong <e@80x24.org>
Fri, 28 Aug 2015 00:00:47 +0000 (00:00 +0000)
committerEric Wong <e@80x24.org>
Fri, 28 Aug 2015 00:53:19 +0000 (00:53 +0000)
A document may have many terms, so this hurts performance
if we blindly iterate.  Unfortunately, we can't rely on the
order of the termlist just yet, either, so we must repeatedly
restart the search for now until we're ready to bump schema
versions.

lib/PublicInbox/SearchMsg.pm

index a9f3180b827e9c8aac8351671dce988d9b08459e..4ad8a0c95e898249fdd2b9a889bf148ff01138f4 100644 (file)
@@ -110,7 +110,6 @@ sub references_sorted {
 sub ensure_metadata {
        my ($self) = @_;
        my $doc = $self->{doc};
-       my $i = $doc->termlist_begin;
        my $end = $doc->termlist_end;
 
        unless (defined $PFX2TERM_RE) {
@@ -118,12 +117,17 @@ sub ensure_metadata {
                $PFX2TERM_RE = qr/\A($or)/;
        }
 
-       for (; $i != $end; $i->inc) {
-               my $val = $i->get_termname;
+       while (my ($pfx, $field) = each %PublicInbox::Search::PFX2TERM_RMAP) {
+               # ideally we'd move this out of the loop:
+               my $i = $doc->termlist_begin;
 
-               if ($val =~ s/$PFX2TERM_RE//o) {
-                       my $field = $PublicInbox::Search::PFX2TERM_RMAP{$1};
-                       $self->{$field} = $val;
+               $i->skip_to($pfx);
+               if ($i != $end) {
+                       my $val = $i->get_termname;
+
+                       if ($val =~ s/$PFX2TERM_RE//o) {
+                               $self->{$field} = $val;
+                       }
                }
        }
 }