X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSearchIdx.pm;h=c33a48c3bd2d5708d2414a008d25aabd40c3aca2;hb=95bdac7f09c69036efed537a4d03d5bdd2ae4eb6;hp=62e836e0d2d936ed849eec108cca7852b554cb14;hpb=da1ae9ccd829966195bfe59f17e416f218746def;p=public-inbox.git diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 62e836e0..c33a48c3 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2019 all contributors +# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # based on notmuch, but with no concept of folders, files or flags # @@ -12,7 +12,7 @@ use warnings; use base qw(PublicInbox::Search PublicInbox::Lock); use PublicInbox::MIME; use PublicInbox::InboxWritable; -use PublicInbox::MID qw/mid_clean id_compress mid_mime mids_for_index/; +use PublicInbox::MID qw/mid_clean mid_mime mids_for_index/; use PublicInbox::MsgIter; use Carp qw(croak); use POSIX qw(strftime); @@ -34,7 +34,7 @@ sub new { ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; my $levels = qr/\A(?:full|medium|basic)\z/; my $inboxdir = $ibx->{inboxdir}; - my $version = $ibx->{version} || 1; + my $version = $ibx->version; my $indexlevel = 'full'; my $altid = $ibx->{altid}; if ($altid) { @@ -54,7 +54,7 @@ sub new { -inbox => $ibx, git => $ibx->git, -altid => $altid, - version => $version, + ibx_ver => $version, indexlevel => $indexlevel, }, $class; $ibx->umask_prepare; @@ -144,14 +144,14 @@ sub term_generator ($) { # write-only } sub index_text ($$$$) { - my ($self, $field, $n, $text) = @_; - my $tg = term_generator($self); + my ($self, $text, $wdf_inc, $prefix) = @_; + my $tg = term_generator($self); # man Search::Xapian::TermGenerator if ($self->{indexlevel} eq 'full') { - $tg->index_text($field, $n, $text); + $tg->index_text($text, $wdf_inc, $prefix); $tg->increase_termpos; } else { - $tg->index_text_without_positions($field, $n, $text); + $tg->index_text_without_positions($text, $wdf_inc, $prefix); } } @@ -199,22 +199,18 @@ sub index_old_diff_fn { } sub index_diff ($$$) { - my ($self, $lines, $doc) = @_; + my ($self, $txt, $doc) = @_; my %seen; my $in_diff; my @xnq; my $xnq = \@xnq; - foreach (@$lines) { + foreach (split(/\n/, $txt)) { if ($in_diff && s/^ //) { # diff context index_diff_inc($self, $_, 'XDFCTX', $xnq); } elsif (/^-- $/) { # email signature begins $in_diff = undef; - } elsif (m!^diff --git ("?a/.+) ("?b/.+)\z!) { - my ($fa, $fb) = ($1, $2); - my $fn = (split('/', git_unquote($fa), 2))[1]; - $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq); - $fn = (split('/', git_unquote($fb), 2))[1]; - $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq); + } elsif (m!^diff --git "?[^/]+/.+ "?[^/]+/.+\z!) { + # wait until "---" and "+++" to capture filenames $in_diff = 1; # traditional diff: } elsif (m/^diff -(.+) (\S+) (\S+)$/) { @@ -224,12 +220,12 @@ sub index_diff ($$$) { next unless $opt =~ /[uU]/; $in_diff = index_old_diff_fn($self, \%seen, $fa, $fb, $xnq); - } elsif (m!^--- ("?a/.+)!) { + } elsif (m!^--- ("?[^/]+/.+)!) { my $fn = $1; $fn = (split('/', git_unquote($fn), 2))[1]; $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq); $in_diff = 1; - } elsif (m!^\+\+\+ ("?b/.+)!) { + } elsif (m!^\+\+\+ ("?[^/]+/.+)!) { my $fn = $1; $fn = (split('/', git_unquote($fn), 2))[1]; $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq); @@ -278,20 +274,17 @@ sub index_diff ($$$) { } sub index_body ($$$) { - my ($self, $lines, $doc) = @_; - my $txt = join("\n", @$lines); + my ($self, $txt, $doc) = @_; if ($doc) { # does it look like a diff? if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { - $txt = undef; - index_diff($self, $lines, $doc); + index_diff($self, $txt, $doc); } else { index_text($self, $txt, 1, 'XNQ'); } } else { index_text($self, $txt, 0, 'XQUOT'); } - @$lines = (); } sub index_xapian { # msg_iter callback @@ -306,19 +299,10 @@ sub index_xapian { # msg_iter callback my ($s, undef) = msg_part_text($part, $ct); defined $s or return; - my (@orig, @quot); - my @lines = split(/\n/, $s); - while (defined(my $l = shift @lines)) { - if ($l =~ /^>/) { - index_body($self, \@orig, $doc) if @orig; - push @quot, $l; - } else { - index_body($self, \@quot, 0) if @quot; - push @orig, $l; - } - } - index_body($self, \@quot, 0) if @quot; - index_body($self, \@orig, $doc) if @orig; + # split off quoted and unquoted blocks: + my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s); + $part = $s = undef; + index_body($self, $_, /\A>/ ? 0 : $doc) for @sections; } sub add_xapian ($$$$$$) { @@ -370,7 +354,7 @@ sub add_xapian ($$$$$$) { sub _msgmap_init ($) { my ($self) = @_; - die "BUG: _msgmap_init is only for v1\n" if $self->{version} != 1; + die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1; $self->{mm} //= eval { require PublicInbox::Msgmap; PublicInbox::Msgmap->new($self->{inboxdir}, 1); @@ -445,7 +429,7 @@ sub remove_message { batch_do($self, 'Q' . $mid, sub { my ($ids) = @_; $db->delete_document($_) for @$ids; - $nr = scalar @$ids; + $nr += scalar @$ids; }); }; if ($@) { @@ -678,7 +662,6 @@ sub is_ancestor ($$$) { my $cmd = [ 'git', "--git-dir=$git->{git_dir}", qw(merge-base --is-ancestor), $cur, $tip ]; my $pid = spawn($cmd); - defined $pid or die "spawning ".join(' ', @$cmd)." failed: $!"; waitpid($pid, 0) == $pid or die join(' ', @$cmd) .' did not finish'; $? == 0; }