use base qw(PublicInbox::Search PublicInbox::Lock);
use PublicInbox::MIME;
use PublicInbox::InboxWritable;
-use PublicInbox::MID qw/mid_clean id_compress mid_mime mids_for_index/;
+use PublicInbox::MID qw/mid_clean mid_mime mids_for_index/;
use PublicInbox::MsgIter;
use Carp qw(croak);
use POSIX qw(strftime);
}
sub index_text ($$$$) {
- my ($self, $field, $n, $text) = @_;
- my $tg = term_generator($self);
+ my ($self, $text, $wdf_inc, $prefix) = @_;
+ my $tg = term_generator($self); # man Search::Xapian::TermGenerator
if ($self->{indexlevel} eq 'full') {
- $tg->index_text($field, $n, $text);
+ $tg->index_text($text, $wdf_inc, $prefix);
$tg->increase_termpos;
} else {
- $tg->index_text_without_positions($field, $n, $text);
+ $tg->index_text_without_positions($text, $wdf_inc, $prefix);
}
}
}
sub index_diff ($$$) {
- my ($self, $lines, $doc) = @_;
+ my ($self, $txt, $doc) = @_;
my %seen;
my $in_diff;
my @xnq;
my $xnq = \@xnq;
- foreach (@$lines) {
+ foreach (split(/\n/, $txt)) {
if ($in_diff && s/^ //) { # diff context
index_diff_inc($self, $_, 'XDFCTX', $xnq);
} elsif (/^-- $/) { # email signature begins
}
sub index_body ($$$) {
- my ($self, $lines, $doc) = @_;
- my $txt = join("\n", @$lines);
+ my ($self, $txt, $doc) = @_;
if ($doc) {
# does it look like a diff?
if ($txt =~ /^(?:diff|---|\+\+\+) /ms) {
- $txt = undef;
- index_diff($self, $lines, $doc);
+ index_diff($self, $txt, $doc);
} else {
index_text($self, $txt, 1, 'XNQ');
}
} else {
index_text($self, $txt, 0, 'XQUOT');
}
- @$lines = ();
}
sub index_xapian { # msg_iter callback
my ($s, undef) = msg_part_text($part, $ct);
defined $s or return;
- my (@orig, @quot);
- my @lines = split(/\n/, $s);
- while (defined(my $l = shift @lines)) {
- if ($l =~ /^>/) {
- index_body($self, \@orig, $doc) if @orig;
- push @quot, $l;
- } else {
- index_body($self, \@quot, 0) if @quot;
- push @orig, $l;
- }
- }
- index_body($self, \@quot, 0) if @quot;
- index_body($self, \@orig, $doc) if @orig;
+ # split off quoted and unquoted blocks:
+ my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s);
+ $part = $s = undef;
+ index_body($self, $_, /\A>/ ? 0 : $doc) for @sections;
}
sub add_xapian ($$$$$$) {
batch_do($self, 'Q' . $mid, sub {
my ($ids) = @_;
$db->delete_document($_) for @$ids;
- $nr = scalar @$ids;
+ $nr += scalar @$ids;
});
};
if ($@) {