From: Eric Wong Date: Fri, 3 Jan 2020 08:45:59 +0000 (+0000) Subject: searchidx: split off index_xapian for msg_iter X-Git-Tag: v1.3.0~146 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=60fdf3773655ab459dc52d6df8ace6555c903311 searchidx: split off index_xapian for msg_iter This ought to save some memory, but it's probably lost in the noise given the cost of indexing. Regardless it still reduces the indentation level and makes future changes easier to read. --- diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 4cfbc4aa..5065974c 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -285,6 +285,33 @@ sub index_body ($$$) { @$lines = (); } +sub index_xapian { # msg_iter callback + my ($part, $depth, @idx) = @{$_[0]}; + my ($self, $doc) = @{$_[1]}; + my $ct = $part->content_type || 'text/plain'; + my $fn = $part->filename; + if (defined $fn && $fn ne '') { + $self->index_text($fn, 1, 'XFN'); + } + + my ($s, undef) = msg_part_text($part, $ct); + defined $s or return; + + my (@orig, @quot); + my @lines = split(/\n/, $s); + while (defined(my $l = shift @lines)) { + if ($l =~ /^>/) { + $self->index_body(\@orig, $doc) if @orig; + push @quot, $l; + } else { + $self->index_body(\@quot, 0) if @quot; + push @orig, $l; + } + } + $self->index_body(\@quot, 0) if @quot; + $self->index_body(\@orig, $doc) if @orig; +} + sub add_xapian ($$$$$) { my ($self, $mime, $num, $oid, $mids, $mid0) = @_; my $smsg = PublicInbox::SearchMsg->new($mime); @@ -303,32 +330,7 @@ sub add_xapian ($$$$$) { $self->index_text($subj, 1, 'S') if $subj; $self->index_users($smsg); - msg_iter($mime, sub { - my ($part, $depth, @idx) = @{$_[0]}; - my $ct = $part->content_type || 'text/plain'; - my $fn = $part->filename; - if (defined $fn && $fn ne '') { - $self->index_text($fn, 1, 'XFN'); - } - - my ($s, undef) = msg_part_text($part, $ct); - defined $s or return; - - my (@orig, @quot); - my @lines = split(/\n/, $s); - while (defined(my $l = shift @lines)) { - if ($l =~ /^>/) { - $self->index_body(\@orig, $doc) if @orig; - push @quot, $l; - } else { - $self->index_body(\@quot, 0) if @quot; - push @orig, $l; - } - } - $self->index_body(\@quot, 0) if @quot; - $self->index_body(\@orig, $doc) if @orig; - }); - + msg_iter($mime, \&index_xapian, [ $self, $doc ]); foreach my $mid (@$mids) { $self->index_text($mid, 1, 'XM');