#
# Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use
# with the web and NNTP interfaces. This index maintains thread
-# relationships for use by Mail::Thread. This writes to the search
-# index.
+# relationships for use by PublicInbox::SearchThread.
+# This writes to the search index.
package PublicInbox::SearchIdx;
use strict;
use warnings;
use Fcntl qw(:flock :DEFAULT);
-use Email::MIME;
+use PublicInbox::MIME;
use Email::MIME::ContentType;
$Email::MIME::ContentType::STRICT_PARAMS = 0;
use base qw(PublicInbox::Search);
if ($smsg) {
# convert a ghost to a regular message
# it will also clobber any existing regular message
- $doc_id = $smsg->doc_id;
+ $doc_id = $smsg->{doc_id};
$old_tid = $smsg->thread_id;
}
$smsg = PublicInbox::SearchMsg->new($mime);
msg_iter($mime, sub {
my ($part, $depth, @idx) = @{$_[0]};
my $ct = $part->content_type || 'text/plain';
+ my $fn = $part->filename;
+ if (defined $fn && $fn ne '') {
+ $tg->index_text($fn, 1, 'XFN');
+ }
return if $ct =~ m!\btext/x?html\b!i;
my ($self, $smsg, $old_tid) = @_;
my $doc = $smsg->{doc};
my $mid = $smsg->mid;
- my $mime = $smsg->mime;
+ my $mime = $smsg->{mime};
my $hdr = $mime->header_obj;
my $refs = $hdr->header_raw('References');
- my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : ();
- if (my $irt = $hdr->header_raw('In-Reply-To')) {
- # last References should be $irt
- # we will de-dupe later
- push @refs, mid_clean($irt);
+ my @refs = defined $refs ? ($refs =~ /<([^>]+)>/g) : ();
+ my $irt = $hdr->header_raw('In-Reply-To');
+ if (defined $irt) {
+ if ($irt eq '') {
+ $irt = undef;
+ } else {
+ $irt = mid_clean($irt);
+ $irt = undef if $mid eq $irt;
+ }
}
my $tid;
my @orig_refs = @refs;
@refs = ();
+ if (defined $irt) {
+ # to check MAX_MID_SIZE
+ push @orig_refs, $irt;
+
+ # below, we will ensure IRT (if specified)
+ # is the last References
+ $uniq{$irt} = 1;
+ }
+
# prevent circular references via References: here:
foreach my $ref (@orig_refs) {
if (length($ref) > MAX_MID_SIZE) {
push @refs, $ref;
}
}
+
+ # last References should be IRT, but some mail clients do things
+ # out of order, so trust IRT over References iff IRT exists
+ push @refs, $irt if defined $irt;
+
if (@refs) {
$smsg->{references} = '<'.join('> <', @refs).'>';
my $str = $git->cat_file($blob, $sizeref);
# fixup bugs from import:
$$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
- Email::MIME->new($str);
+ PublicInbox::MIME->new($str);
};
$@ ? undef : $mime;
}