]> Sergey Matveev's repositories - public-inbox.git/blobdiff - lib/PublicInbox/SearchIdx.pm
handle "multipart/mixed" messages which are not multipart
[public-inbox.git] / lib / PublicInbox / SearchIdx.pm
index bb60506ce1a9996e1d6a9971e61459cf13b250b7..76f3f33a4de5b9fa6925101c20d95ac870da9529 100644 (file)
@@ -27,6 +27,8 @@ use constant {
        DEBUG => !!$ENV{DEBUG},
 };
 
+my $xapianlevels = qr/\A(?:full|medium)\z/;
+
 my %GIT_ESC = (
        a => "\a",
        b => "\b",
@@ -303,24 +305,11 @@ sub add_xapian ($$$$$) {
                        $self->index_text($fn, 1, 'XFN');
                }
 
-               return if $ct =~ m!\btext/x?html\b!i;
-
-               my $s = eval { $part->body_str };
-               if ($@) {
-                       if ($ct =~ m!\btext/plain\b!i) {
-                               # Try to assume UTF-8 because Alpine
-                               # seems to do wacky things and set
-                               # charset=X-UNKNOWN
-                               $part->charset_set('UTF-8');
-                               $s = eval { $part->body_str };
-                               $s = $part->body if $@;
-                       }
-               }
+               my ($s, undef) = msg_part_text($part, $ct);
                defined $s or return;
 
                my (@orig, @quot);
-               my $body = $part->body;
-               my @lines = split(/\n/, $body);
+               my @lines = split(/\n/, $s);
                while (defined(my $l = shift @lines)) {
                        if ($l =~ /^>/) {
                                $self->index_body(\@orig, $doc) if @orig;
@@ -365,7 +354,6 @@ sub add_xapian ($$$$$) {
 sub add_message {
        # mime = Email::MIME object
        my ($self, $mime, $bytes, $num, $oid, $mid0) = @_;
-       my $xapianlevels = qr/\A(?:full|medium)\z/;
        my $mids = mids($mime->header_obj);
        $mid0 = $mids->[0] unless defined $mid0; # v1 compatibility
        unless (defined $num) { # v1
@@ -582,7 +570,8 @@ sub read_log {
                        my $blob = $1;
                        if (delete $D{$blob}) {
                                if (defined $self->{regen_down}) {
-                                       $self->{regen_down}--;
+                                       my $num = $self->{regen_down}--;
+                                       $self->{mm}->num_highwater($num);
                                }
                                next;
                        }
@@ -618,23 +607,28 @@ sub _git_log {
        my ($self, $range) = @_;
        my $git = $self->{git};
 
+       # Count the new files so they can be added newest to oldest
+       # and still have numbers increasing from oldest to newest
+       my $fcount = 0;
+       # can't use 'rev-list --count' if we use --diff-filter
+       my $fh = $git->popen(qw(log --pretty=tformat:%h
+                            --no-notes --no-color --no-renames
+                            --diff-filter=AM), $range);
+       ++$fcount while <$fh>;
+       my $high = $self->{mm}->num_highwater;
+
        if (index($range, '..') < 0) {
-               my $regen_max = 0;
-               # can't use 'rev-list --count' if we use --diff-filter
-               my $fh = $git->popen(qw(log --pretty=tformat:%h
-                               --no-notes --no-color --no-renames
-                               --diff-filter=AM), $range);
-               ++$regen_max while <$fh>;
-               my (undef, $max) = $self->{mm}->minmax;
-
-               if ($max && $max == $regen_max) {
+               if ($high && $high == $fcount) {
                        # fix up old bugs in full indexes which caused messages to
                        # not appear in Msgmap
-                       $self->{regen_up} = $max;
+                       $self->{regen_up} = $high;
                } else {
                        # normal regen is for for fresh data
-                       $self->{regen_down} = $regen_max;
+                       $self->{regen_down} = $fcount;
                }
+       } else {
+               # Give oldest messages the smallest numbers
+               $self->{regen_down} = $high + $fcount;
        }
 
        $git->popen(qw/log --no-notes --no-color --no-renames
@@ -714,7 +708,7 @@ sub _index_sync {
                        }
                        $dbh->commit;
                }
-               if ($mkey && $newest) {
+               if ($mkey && $newest && $self->{indexlevel} =~ $xapianlevels) {
                        my $cur = $xdb->get_metadata($mkey);
                        if (need_update($self, $cur, $newest)) {
                                $xdb->set_metadata($mkey, $newest);