]> Sergey Matveev's repositories - public-inbox.git/blobdiff - lib/PublicInbox/SearchIdx.pm
handle "multipart/mixed" messages which are not multipart
[public-inbox.git] / lib / PublicInbox / SearchIdx.pm
index 29868d991bc25ec03c00572ba21d626413fc62b6..76f3f33a4de5b9fa6925101c20d95ac870da9529 100644 (file)
@@ -305,19 +305,7 @@ sub add_xapian ($$$$$) {
                        $self->index_text($fn, 1, 'XFN');
                }
 
-               return if $ct =~ m!\btext/x?html\b!i;
-
-               my $s = eval { $part->body_str };
-               if ($@) {
-                       if ($ct =~ m!\btext/plain\b!i) {
-                               # Try to assume UTF-8 because Alpine
-                               # seems to do wacky things and set
-                               # charset=X-UNKNOWN
-                               $part->charset_set('UTF-8');
-                               $s = eval { $part->body_str };
-                               $s = $part->body if $@;
-                       }
-               }
+               my ($s, undef) = msg_part_text($part, $ct);
                defined $s or return;
 
                my (@orig, @quot);
@@ -582,7 +570,8 @@ sub read_log {
                        my $blob = $1;
                        if (delete $D{$blob}) {
                                if (defined $self->{regen_down}) {
-                                       $self->{regen_down}--;
+                                       my $num = $self->{regen_down}--;
+                                       $self->{mm}->num_highwater($num);
                                }
                                next;
                        }
@@ -618,23 +607,28 @@ sub _git_log {
        my ($self, $range) = @_;
        my $git = $self->{git};
 
+       # Count the new files so they can be added newest to oldest
+       # and still have numbers increasing from oldest to newest
+       my $fcount = 0;
+       # can't use 'rev-list --count' if we use --diff-filter
+       my $fh = $git->popen(qw(log --pretty=tformat:%h
+                            --no-notes --no-color --no-renames
+                            --diff-filter=AM), $range);
+       ++$fcount while <$fh>;
+       my $high = $self->{mm}->num_highwater;
+
        if (index($range, '..') < 0) {
-               my $regen_max = 0;
-               # can't use 'rev-list --count' if we use --diff-filter
-               my $fh = $git->popen(qw(log --pretty=tformat:%h
-                               --no-notes --no-color --no-renames
-                               --diff-filter=AM), $range);
-               ++$regen_max while <$fh>;
-               my (undef, $max) = $self->{mm}->minmax;
-
-               if ($max && $max == $regen_max) {
+               if ($high && $high == $fcount) {
                        # fix up old bugs in full indexes which caused messages to
                        # not appear in Msgmap
-                       $self->{regen_up} = $max;
+                       $self->{regen_up} = $high;
                } else {
                        # normal regen is for for fresh data
-                       $self->{regen_down} = $regen_max;
+                       $self->{regen_down} = $fcount;
                }
+       } else {
+               # Give oldest messages the smallest numbers
+               $self->{regen_down} = $high + $fcount;
        }
 
        $git->popen(qw/log --no-notes --no-color --no-renames