X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSearchIdx.pm;h=579b85e3927cfd70df044db9ff9a224335e2a1d9;hb=0c586dc64b3b6642a894e125d09df446667a4079;hp=44b05813f9a43e371068071ac9b2b666c275d555;hpb=b5bc3576af3d0ef0fa884ed32a674c7a703a19b2;p=public-inbox.git diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 44b05813..579b85e3 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -58,6 +58,7 @@ sub new { ibx_ver => $version, indexlevel => $indexlevel, }, $class; + $self->{-set_indexlevel_once} = 1 if $indexlevel eq 'medium'; $ibx->umask_prepare; if ($version == 1) { $self->{lock_path} = "$inboxdir/ssoma.lock"; @@ -274,22 +275,8 @@ sub index_diff ($$$) { index_text($self, join("\n", @xnq), 1, 'XNQ'); } -sub index_body ($$$) { - my ($self, $txt, $doc) = @_; - if ($doc) { - # does it look like a diff? - if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { - index_diff($self, $txt, $doc); - } else { - index_text($self, $txt, 1, 'XNQ'); - } - } else { - index_text($self, $txt, 0, 'XQUOT'); - } -} - sub index_xapian { # msg_iter callback - my ($part, $depth, @idx) = @{$_[0]}; + my $part = $_[0]->[0]; # ignore $depth and @idx my ($self, $doc) = @{$_[1]}; my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; @@ -299,11 +286,24 @@ sub index_xapian { # msg_iter callback my ($s, undef) = msg_part_text($part, $ct); defined $s or return; + $_[0]->[0] = $part = undef; # free memory # split off quoted and unquoted blocks: - my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s); - $part = $s = undef; - index_body($self, $_, /\A>/ ? 0 : $doc) for @sections; + my @sections = PublicInbox::MsgIter::split_quotes($s); + undef $s; # free memory + for my $txt (@sections) { + if ($txt =~ /\A>/) { + index_text($self, $txt, 0, 'XQUOT'); + } else { + # does it look like a diff? + if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { + index_diff($self, $txt, $doc); + } else { + index_text($self, $txt, 1, 'XNQ'); + } + } + undef $txt; # free memory + } } sub add_xapian ($$$$) { @@ -496,13 +496,13 @@ sub index_git_blob_id { sub unindex_blob { my ($self, $mime) = @_; - my $mid = eval { mid_clean(mid_mime($mime)) }; + my $mid = eval { mid_mime($mime) }; $self->remove_message($mid) if defined $mid; } sub index_mm { my ($self, $mime) = @_; - my $mid = mid_clean(mid_mime($mime)); + my $mid = mid_mime($mime); my $mm = $self->{mm}; my $num; @@ -533,7 +533,7 @@ sub index_mm { sub unindex_mm { my ($self, $mime) = @_; - $self->{mm}->mid_delete(mid_clean(mid_mime($mime))); + $self->{mm}->mid_delete(mid_mime($mime)); } sub index_both { @@ -551,13 +551,9 @@ sub unindex_both { sub do_cat_mail { my ($git, $blob, $sizeref) = @_; - my $mime = eval { - my $str = $git->cat_file($blob, $sizeref); - # fixup bugs from import: - $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; - PublicInbox::MIME->new($str); - }; - $@ ? undef : $mime; + my $str = $git->cat_file($blob, $sizeref) or + die "BUG: $blob not found in $git->{git_dir}"; + PublicInbox::MIME->new($str); } # called by public-inbox-index @@ -602,7 +598,7 @@ sub read_log { } next; } - my $mime = do_cat_mail($git, $blob, \$bytes) or next; + my $mime = do_cat_mail($git, $blob, \$bytes); my $smsg = bless {}, 'PublicInbox::Smsg'; batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr); $smsg->{blob} = $blob; @@ -623,7 +619,7 @@ sub read_log { close($log) or die "git log failed: \$?=$?"; # get the leftovers foreach my $blob (keys %D) { - my $mime = do_cat_mail($git, $blob, \$bytes) or next; + my $mime = do_cat_mail($git, $blob, \$bytes); $del_cb->($self, $mime); } $batch_cb->($nr, $latest, $newest); @@ -634,7 +630,7 @@ sub _git_log { my $git = $self->{git}; if (index($range, '..') < 0) { - # don't show annoying git errrors to users who run -index + # don't show annoying git errors to users who run -index # on empty inboxes $git->qx(qw(rev-parse -q --verify), "$range^0"); if ($?) { @@ -842,20 +838,27 @@ sub begin_txn_lazy { }); } +# store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard) +# This metadata is read by Admin::detect_indexlevel: +sub set_indexlevel { + my ($self) = @_; + + if (!$self->{shard} && # undef or 0, not >0 + delete($self->{-set_indexlevel_once})) { + my $xdb = $self->{xdb}; + my $level = $xdb->get_metadata('indexlevel'); + if (!$level || $level ne 'medium') { + $xdb->set_metadata('indexlevel', 'medium'); + } + } +} + sub commit_txn_lazy { my ($self) = @_; delete $self->{txn} or return; $self->{-inbox}->with_umask(sub { if (my $xdb = $self->{xdb}) { - - # store 'indexlevel=medium' in v2 shard=0 and - # v1 (only one shard) - # This metadata is read by Admin::detect_indexlevel: - if (!$self->{shard} # undef or 0, not >0 - && $self->{indexlevel} eq 'medium') { - $xdb->set_metadata('indexlevel', 'medium'); - } - + set_indexlevel($self); $xdb->commit_transaction; } $self->{over}->commit_lazy if $self->{over};