From: Eric Wong Date: Fri, 28 Aug 2015 00:00:47 +0000 (+0000) Subject: search: do not iterate through entire termlist X-Git-Tag: v1.0.0~968 X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=11820f6911d21ee1326d52d99a28063acf872911;p=public-inbox.git search: do not iterate through entire termlist A document may have many terms, so this hurts performance if we blindly iterate. Unfortunately, we can't rely on the order of the termlist just yet, either, so we must repeatedly restart the search for now until we're ready to bump schema versions. --- diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index a9f3180b..4ad8a0c9 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -110,7 +110,6 @@ sub references_sorted { sub ensure_metadata { my ($self) = @_; my $doc = $self->{doc}; - my $i = $doc->termlist_begin; my $end = $doc->termlist_end; unless (defined $PFX2TERM_RE) { @@ -118,12 +117,17 @@ sub ensure_metadata { $PFX2TERM_RE = qr/\A($or)/; } - for (; $i != $end; $i->inc) { - my $val = $i->get_termname; + while (my ($pfx, $field) = each %PublicInbox::Search::PFX2TERM_RMAP) { + # ideally we'd move this out of the loop: + my $i = $doc->termlist_begin; - if ($val =~ s/$PFX2TERM_RE//o) { - my $field = $PublicInbox::Search::PFX2TERM_RMAP{$1}; - $self->{$field} = $val; + $i->skip_to($pfx); + if ($i != $end) { + my $val = $i->get_termname; + + if ($val =~ s/$PFX2TERM_RE//o) { + $self->{$field} = $val; + } } } }