# 2 - subject_path is mid_compressed in the index, only
# 3 - message-ID is compressed if it includes '%' (hack!)
# 4 - change "Re: " normalization, avoid circular Reference ghosts
- SCHEMA_VERSION => 4,
+ # 5 - subject_path drops trailing '.'
+ # 6 - preserve References: order in document data
+ SCHEMA_VERSION => 6,
QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
};
sub xpfx { $all_pfx{$_[0]} }
our %PFX2TERM_RMAP;
+my %meta_pfx = (mid => 1, thread => 1, path => 1, type => 1);
while (my ($k, $v) = each %all_pfx) {
- next if $prob_prefix{$k};
- $PFX2TERM_RMAP{$v} = $k;
+ $PFX2TERM_RMAP{$v} = $k if $meta_pfx{$k};
}
my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail');
my $db = $self->{xdb};
my $doc_id;
- my $mid_orig = mid_clean($mime->header_obj->header_raw('Message-ID'));
+ my $mid_orig = mid_clean($mime->header_obj->header('Message-ID'));
my $mid = mid_compressed($mid_orig);
my $was_ghost = 0;
my $ct_msg = $mime->header('Content-Type') || 'text/plain';
my $ts = Search::Xapian::sortable_serialise($smsg->ts);
$doc->add_value(PublicInbox::Search::TS, $ts);
- $doc->set_data($smsg->to_doc_data);
-
my $tg = $self->term_generator;
$tg->set_document($doc);
if ($was_ghost) {
$doc_id = $smsg->doc_id;
$self->link_message($smsg, 0);
+ $doc->set_data($smsg->to_doc_data);
$db->replace_document($doc_id, $doc);
} else {
$self->link_message($smsg, 0);
+ $doc->set_data($smsg->to_doc_data);
$doc_id = $db->add_document($doc);
}
};
my ($self, $mid, $opts) = @_;
my $smsg = eval { $self->lookup_message($mid) };
- return { count => 0, msgs => [] } unless $smsg;
+ return { total => 0, msgs => [] } unless $smsg;
my $qp = $self->qp;
my $qtid = $qp->parse_query('thread:'.$smsg->thread_id);
my $qsub = $qp->parse_query('path:'.mid_compressed($smsg->path));
my $query = Search::Xapian::Query->new(OP_OR, $qtid, $qsub);
- $self->do_enquire($query);
+ $self->do_enquire($query, $opts);
}
# private subs below
PublicInbox::SearchMsg->load_doc($_->get_document);
} $mset->items;
- { count => $mset->get_matches_estimated, msgs => \@msgs }
+ { total => $mset->get_matches_estimated, msgs => \@msgs }
}
# read-write
my $doc = $smsg->{doc};
my $mid = mid_compressed($smsg->mid);
my $mime = $smsg->mime;
- my $refs = $mime->header_obj->header_raw('References');
+ my $refs = $mime->header_obj->header('References');
my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : ();
- my $irt = $mime->header_obj->header_raw('In-Reply-To');
+ my $irt = $mime->header_obj->header('In-Reply-To');
if ($irt) {
- if ($irt =~ /<([^>]+)>/) {
- $irt = $1;
- }
+ $irt = mid_compressed(mid_clean($irt));
# maybe some crazies will try to make a circular reference:
if ($irt eq $mid) {
$irt = undef;
} else {
+ # last References should be $irt
+ # we will de-dupe later
push @refs, $irt;
}
}
$uniq{$ref} = 1;
push @refs, $ref;
}
- $irt = undef if (defined $irt && !$uniq{$irt});
}
if (@refs) {
- if (defined $irt) {
- $doc->add_term(xpfx('inreplyto') . $irt);
- }
+ $doc->add_term(xpfx('inreplyto') . $irt) if defined $irt;
+ $smsg->{references_sorted} = '<'.join('><', @refs).'>';
my $ref_pfx = xpfx('references');
sub unindex_blob {
my ($self, $git, $blob) = @_;
my $mime = do_cat_mail($git, $blob) or return;
- my $mid = $mime->header_obj->header_raw('Message-ID');
+ my $mid = $mime->header_obj->header('Message-ID');
eval { $self->remove_message($mid) } if defined $mid;
warn "W: unindex_blob $blob: $@\n" if $@;
}
# indexes all unindexed messages
sub index_sync {
- my ($self) = @_;
+ my ($self, $head) = @_;
require PublicInbox::GitCatFile;
my $db = $self->{xdb};
my $hex = '[a-f0-9]';
my $h40 = $hex .'{40}';
my $addmsg = qr!^:000000 100644 \S+ ($h40) A\t${hex}{2}/${hex}{38}$!;
my $delmsg = qr!^:100644 000000 ($h40) \S+ D\t${hex}{2}/${hex}{38}$!;
+ $head ||= 'HEAD';
$db->begin_transaction;
eval {
my $git = PublicInbox::GitCatFile->new($self->{git_dir});
my $latest = $db->get_metadata('last_commit');
- my $range = length $latest ? "$latest..HEAD" : 'HEAD';
+ my $range = length $latest ? "$latest..$head" : $head;
$latest = undef;
# get indexed messages