X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSearchIdx.pm;h=d1290dc208e37eb7689deff009da165fa5234ed1;hb=b15ca9a77bff088a3f5f0b8955de8b6a60565b04;hp=7d089e7aee4271645ae562ccf1b13ab24f6d68f7;hpb=8e1ec8836dabc58dfc0115b36ed440b4371b70d7;p=public-inbox.git diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 7d089e7a..d1290dc2 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -275,22 +275,8 @@ sub index_diff ($$$) { index_text($self, join("\n", @xnq), 1, 'XNQ'); } -sub index_body ($$$) { - my ($self, $txt, $doc) = @_; - if ($doc) { - # does it look like a diff? - if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { - index_diff($self, $txt, $doc); - } else { - index_text($self, $txt, 1, 'XNQ'); - } - } else { - index_text($self, $txt, 0, 'XQUOT'); - } -} - sub index_xapian { # msg_iter callback - my ($part, $depth, @idx) = @{$_[0]}; + my $part = $_[0]->[0]; # ignore $depth and @idx my ($self, $doc) = @{$_[1]}; my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; @@ -300,11 +286,24 @@ sub index_xapian { # msg_iter callback my ($s, undef) = msg_part_text($part, $ct); defined $s or return; + $_[0]->[0] = $part = undef; # free memory # split off quoted and unquoted blocks: - my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s); - $part = $s = undef; - index_body($self, $_, /\A>/ ? 0 : $doc) for @sections; + my @sections = PublicInbox::MsgIter::split_quotes($s); + undef $s; # free memory + for my $txt (@sections) { + if ($txt =~ /\A>/) { + index_text($self, $txt, 0, 'XQUOT'); + } else { + # does it look like a diff? + if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { + index_diff($self, $txt, $doc); + } else { + index_text($self, $txt, 1, 'XNQ'); + } + } + undef $txt; # free memory + } } sub add_xapian ($$$$) { @@ -497,13 +496,13 @@ sub index_git_blob_id { sub unindex_blob { my ($self, $mime) = @_; - my $mid = eval { mid_clean(mid_mime($mime)) }; + my $mid = eval { mid_mime($mime) }; $self->remove_message($mid) if defined $mid; } sub index_mm { my ($self, $mime) = @_; - my $mid = mid_clean(mid_mime($mime)); + my $mid = mid_mime($mime); my $mm = $self->{mm}; my $num; @@ -534,7 +533,7 @@ sub index_mm { sub unindex_mm { my ($self, $mime) = @_; - $self->{mm}->mid_delete(mid_clean(mid_mime($mime))); + $self->{mm}->mid_delete(mid_mime($mime)); } sub index_both { @@ -552,13 +551,11 @@ sub unindex_both { sub do_cat_mail { my ($git, $blob, $sizeref) = @_; - my $mime = eval { - my $str = $git->cat_file($blob, $sizeref); - # fixup bugs from import: - $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; - PublicInbox::MIME->new($str); - }; - $@ ? undef : $mime; + my $str = $git->cat_file($blob, $sizeref) or + die "BUG: $blob not found in $git->{git_dir}"; + # fixup bugs from import: + $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + PublicInbox::MIME->new($str); } # called by public-inbox-index @@ -603,7 +600,7 @@ sub read_log { } next; } - my $mime = do_cat_mail($git, $blob, \$bytes) or next; + my $mime = do_cat_mail($git, $blob, \$bytes); my $smsg = bless {}, 'PublicInbox::Smsg'; batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr); $smsg->{blob} = $blob; @@ -624,7 +621,7 @@ sub read_log { close($log) or die "git log failed: \$?=$?"; # get the leftovers foreach my $blob (keys %D) { - my $mime = do_cat_mail($git, $blob, \$bytes) or next; + my $mime = do_cat_mail($git, $blob, \$bytes); $del_cb->($self, $mime); } $batch_cb->($nr, $latest, $newest); @@ -635,7 +632,7 @@ sub _git_log { my $git = $self->{git}; if (index($range, '..') < 0) { - # don't show annoying git errrors to users who run -index + # don't show annoying git errors to users who run -index # on empty inboxes $git->qx(qw(rev-parse -q --verify), "$range^0"); if ($?) {