X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSearchIdx.pm;h=4e951bbedc7449d899a683b8ce6f965f3d291e5e;hb=227a1d886672767e37cc86a3432952c14eb8a143;hp=62e836e0d2d936ed849eec108cca7852b554cb14;hpb=da1ae9ccd829966195bfe59f17e416f218746def;p=public-inbox.git diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 62e836e0..4e951bbe 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -12,7 +12,7 @@ use warnings; use base qw(PublicInbox::Search PublicInbox::Lock); use PublicInbox::MIME; use PublicInbox::InboxWritable; -use PublicInbox::MID qw/mid_clean id_compress mid_mime mids_for_index/; +use PublicInbox::MID qw/mid_clean mid_mime mids_for_index/; use PublicInbox::MsgIter; use Carp qw(croak); use POSIX qw(strftime); @@ -34,7 +34,7 @@ sub new { ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; my $levels = qr/\A(?:full|medium|basic)\z/; my $inboxdir = $ibx->{inboxdir}; - my $version = $ibx->{version} || 1; + my $version = $ibx->version; my $indexlevel = 'full'; my $altid = $ibx->{altid}; if ($altid) { @@ -144,14 +144,14 @@ sub term_generator ($) { # write-only } sub index_text ($$$$) { - my ($self, $field, $n, $text) = @_; - my $tg = term_generator($self); + my ($self, $text, $wdf_inc, $prefix) = @_; + my $tg = term_generator($self); # man Search::Xapian::TermGenerator if ($self->{indexlevel} eq 'full') { - $tg->index_text($field, $n, $text); + $tg->index_text($text, $wdf_inc, $prefix); $tg->increase_termpos; } else { - $tg->index_text_without_positions($field, $n, $text); + $tg->index_text_without_positions($text, $wdf_inc, $prefix); } } @@ -199,12 +199,12 @@ sub index_old_diff_fn { } sub index_diff ($$$) { - my ($self, $lines, $doc) = @_; + my ($self, $txt, $doc) = @_; my %seen; my $in_diff; my @xnq; my $xnq = \@xnq; - foreach (@$lines) { + foreach (split(/\n/, $txt)) { if ($in_diff && s/^ //) { # diff context index_diff_inc($self, $_, 'XDFCTX', $xnq); } elsif (/^-- $/) { # email signature begins @@ -278,20 +278,17 @@ sub index_diff ($$$) { } sub index_body ($$$) { - my ($self, $lines, $doc) = @_; - my $txt = join("\n", @$lines); + my ($self, $txt, $doc) = @_; if ($doc) { # does it look like a diff? if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { - $txt = undef; - index_diff($self, $lines, $doc); + index_diff($self, $txt, $doc); } else { index_text($self, $txt, 1, 'XNQ'); } } else { index_text($self, $txt, 0, 'XQUOT'); } - @$lines = (); } sub index_xapian { # msg_iter callback @@ -306,19 +303,10 @@ sub index_xapian { # msg_iter callback my ($s, undef) = msg_part_text($part, $ct); defined $s or return; - my (@orig, @quot); - my @lines = split(/\n/, $s); - while (defined(my $l = shift @lines)) { - if ($l =~ /^>/) { - index_body($self, \@orig, $doc) if @orig; - push @quot, $l; - } else { - index_body($self, \@quot, 0) if @quot; - push @orig, $l; - } - } - index_body($self, \@quot, 0) if @quot; - index_body($self, \@orig, $doc) if @orig; + # split off quoted and unquoted blocks: + my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s); + $part = $s = undef; + index_body($self, $_, /\A>/ ? 0 : $doc) for @sections; } sub add_xapian ($$$$$$) { @@ -445,7 +433,7 @@ sub remove_message { batch_do($self, 'Q' . $mid, sub { my ($ids) = @_; $db->delete_document($_) for @$ids; - $nr = scalar @$ids; + $nr += scalar @$ids; }); }; if ($@) { @@ -678,7 +666,6 @@ sub is_ancestor ($$$) { my $cmd = [ 'git', "--git-dir=$git->{git_dir}", qw(merge-base --is-ancestor), $cur, $tip ]; my $pid = spawn($cmd); - defined $pid or die "spawning ".join(' ', @$cmd)." failed: $!"; waitpid($pid, 0) == $pid or die join(' ', @$cmd) .' did not finish'; $? == 0; }