DEBUG => !!$ENV{DEBUG},
};
+my $xapianlevels = qr/\A(?:full|medium)\z/;
+
my %GIT_ESC = (
a => "\a",
b => "\b",
sub add_xapian ($$$$$) {
my ($self, $mime, $num, $oid, $mids, $mid0) = @_;
my $smsg = PublicInbox::SearchMsg->new($mime);
- my $doc = $smsg->{doc};
+ my $doc = Search::Xapian::Document->new;
my $subj = $smsg->subject;
add_val($doc, PublicInbox::Search::TS(), $smsg->ts);
my @ds = gmtime($smsg->ds);
$self->index_text($fn, 1, 'XFN');
}
- return if $ct =~ m!\btext/x?html\b!i;
-
- my $s = eval { $part->body_str };
- if ($@) {
- if ($ct =~ m!\btext/plain\b!i) {
- # Try to assume UTF-8 because Alpine
- # seems to do wacky things and set
- # charset=X-UNKNOWN
- $part->charset_set('UTF-8');
- $s = eval { $part->body_str };
- $s = $part->body if $@;
- }
- }
+ my ($s, undef) = msg_part_text($part, $ct);
defined $s or return;
my (@orig, @quot);
- my $body = $part->body;
- my @lines = split(/\n/, $body);
+ my @lines = split(/\n/, $s);
while (defined(my $l = shift @lines)) {
if ($l =~ /^>/) {
$self->index_body(\@orig, $doc) if @orig;
sub add_message {
# mime = Email::MIME object
my ($self, $mime, $bytes, $num, $oid, $mid0) = @_;
- my $xapianlevels = qr/\A(?:full|medium)\z/;
my $mids = mids($mime->header_obj);
$mid0 = $mids->[0] unless defined $mid0; # v1 compatibility
unless (defined $num) { # v1
for (; $head != $tail; $head->inc) {
my $docid = $head->get_docid;
my $doc = $db->get_document($docid);
- my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
- $smsg->load_expand;
+ my $smsg = PublicInbox::SearchMsg->wrap($mid);
+ $smsg->load_expand($doc);
if ($smsg->{blob} eq $oid) {
push(@delete, $docid);
}
my $blob = $1;
if (delete $D{$blob}) {
if (defined $self->{regen_down}) {
- $self->{regen_down}--;
+ my $num = $self->{regen_down}--;
+ $self->{mm}->num_highwater($num);
}
next;
}
my $git = $self->{git};
if (index($range, '..') < 0) {
- my $regen_max = 0;
- # can't use 'rev-list --count' if we use --diff-filter
- my $fh = $git->popen(qw(log --pretty=tformat:%h
- --no-notes --no-color --no-renames
- --diff-filter=AM), $range);
- ++$regen_max while <$fh>;
- my (undef, $max) = $self->{mm}->minmax;
-
- if ($max && $max == $regen_max) {
+ # don't show annoying git errrors to users who run -index
+ # on empty inboxes
+ $git->qx(qw(rev-parse -q --verify), "$range^0");
+ if ($?) {
+ open my $fh, '<', '/dev/null' or
+ die "failed to open /dev/null: $!\n";
+ return $fh;
+ }
+ }
+
+ # Count the new files so they can be added newest to oldest
+ # and still have numbers increasing from oldest to newest
+ my $fcount = 0;
+ # can't use 'rev-list --count' if we use --diff-filter
+ my $fh = $git->popen(qw(log --pretty=tformat:%h
+ --no-notes --no-color --no-renames
+ --diff-filter=AM), $range);
+ ++$fcount while <$fh>;
+ my $high = $self->{mm}->num_highwater;
+
+ if (index($range, '..') < 0) {
+ if ($high && $high == $fcount) {
# fix up old bugs in full indexes which caused messages to
# not appear in Msgmap
- $self->{regen_up} = $max;
+ $self->{regen_up} = $high;
} else {
# normal regen is for for fresh data
- $self->{regen_down} = $regen_max;
+ $self->{regen_down} = $fcount;
}
+ } else {
+ # Give oldest messages the smallest numbers
+ $self->{regen_down} = $high + $fcount;
}
$git->popen(qw/log --no-notes --no-color --no-renames
--raw -r --no-abbrev/, $range);
}
+# --is-ancestor requires git 1.8.0+
sub is_ancestor ($$$) {
my ($git, $cur, $tip) = @_;
return 0 unless $git->check($cur);
}
$dbh->commit;
}
- if ($mkey && $newest) {
+ if ($mkey && $newest && $self->{indexlevel} =~ $xapianlevels) {
my $cur = $xdb->get_metadata($mkey);
if (need_update($self, $cur, $newest)) {
$xdb->set_metadata($mkey, $newest);