]> Sergey Matveev's repositories - public-inbox.git/blobdiff - lib/PublicInbox/SearchIdx.pm
Merge commit 'mem'
[public-inbox.git] / lib / PublicInbox / SearchIdx.pm
index 29868d991bc25ec03c00572ba21d626413fc62b6..8810fe76450dde64728d715e4e63f14b3c6bb741 100644 (file)
@@ -282,7 +282,7 @@ sub index_body ($$$) {
 sub add_xapian ($$$$$) {
        my ($self, $mime, $num, $oid, $mids, $mid0) = @_;
        my $smsg = PublicInbox::SearchMsg->new($mime);
-       my $doc = $smsg->{doc};
+       my $doc = Search::Xapian::Document->new;
        my $subj = $smsg->subject;
        add_val($doc, PublicInbox::Search::TS(), $smsg->ts);
        my @ds = gmtime($smsg->ds);
@@ -305,19 +305,7 @@ sub add_xapian ($$$$$) {
                        $self->index_text($fn, 1, 'XFN');
                }
 
-               return if $ct =~ m!\btext/x?html\b!i;
-
-               my $s = eval { $part->body_str };
-               if ($@) {
-                       if ($ct =~ m!\btext/plain\b!i) {
-                               # Try to assume UTF-8 because Alpine
-                               # seems to do wacky things and set
-                               # charset=X-UNKNOWN
-                               $part->charset_set('UTF-8');
-                               $s = eval { $part->body_str };
-                               $s = $part->body if $@;
-                       }
-               }
+               my ($s, undef) = msg_part_text($part, $ct);
                defined $s or return;
 
                my (@orig, @quot);
@@ -451,8 +439,8 @@ sub remove_by_oid {
        for (; $head != $tail; $head->inc) {
                my $docid = $head->get_docid;
                my $doc = $db->get_document($docid);
-               my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
-               $smsg->load_expand;
+               my $smsg = PublicInbox::SearchMsg->wrap($mid);
+               $smsg->load_expand($doc);
                if ($smsg->{blob} eq $oid) {
                        push(@delete, $docid);
                }
@@ -582,7 +570,8 @@ sub read_log {
                        my $blob = $1;
                        if (delete $D{$blob}) {
                                if (defined $self->{regen_down}) {
-                                       $self->{regen_down}--;
+                                       my $num = $self->{regen_down}--;
+                                       $self->{mm}->num_highwater($num);
                                }
                                next;
                        }
@@ -619,28 +608,45 @@ sub _git_log {
        my $git = $self->{git};
 
        if (index($range, '..') < 0) {
-               my $regen_max = 0;
-               # can't use 'rev-list --count' if we use --diff-filter
-               my $fh = $git->popen(qw(log --pretty=tformat:%h
-                               --no-notes --no-color --no-renames
-                               --diff-filter=AM), $range);
-               ++$regen_max while <$fh>;
-               my (undef, $max) = $self->{mm}->minmax;
-
-               if ($max && $max == $regen_max) {
+               # don't show annoying git errrors to users who run -index
+               # on empty inboxes
+               $git->qx(qw(rev-parse -q --verify), "$range^0");
+               if ($?) {
+                       open my $fh, '<', '/dev/null' or
+                               die "failed to open /dev/null: $!\n";
+                       return $fh;
+               }
+       }
+
+       # Count the new files so they can be added newest to oldest
+       # and still have numbers increasing from oldest to newest
+       my $fcount = 0;
+       # can't use 'rev-list --count' if we use --diff-filter
+       my $fh = $git->popen(qw(log --pretty=tformat:%h
+                            --no-notes --no-color --no-renames
+                            --diff-filter=AM), $range);
+       ++$fcount while <$fh>;
+       my $high = $self->{mm}->num_highwater;
+
+       if (index($range, '..') < 0) {
+               if ($high && $high == $fcount) {
                        # fix up old bugs in full indexes which caused messages to
                        # not appear in Msgmap
-                       $self->{regen_up} = $max;
+                       $self->{regen_up} = $high;
                } else {
                        # normal regen is for for fresh data
-                       $self->{regen_down} = $regen_max;
+                       $self->{regen_down} = $fcount;
                }
+       } else {
+               # Give oldest messages the smallest numbers
+               $self->{regen_down} = $high + $fcount;
        }
 
        $git->popen(qw/log --no-notes --no-color --no-renames
                                --raw -r --no-abbrev/, $range);
 }
 
+# --is-ancestor requires git 1.8.0+
 sub is_ancestor ($$$) {
        my ($git, $cur, $tip) = @_;
        return 0 unless $git->check($cur);