X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSearchIdx.pm;h=1142ca7a0f4c83a6aa4103edc79d91bd04944e5a;hb=6e83825a9e49ca68694c20ddfed54368d5f3e075;hp=fb68f4b12a66878dbe53108194fbea4396e6ce2c;hpb=0cf6196025d4e4880cd1ed859257ce21dd3cdcf6;p=public-inbox.git diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index fb68f4b1..1142ca7a 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -4,13 +4,13 @@ # # Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use # with the web and NNTP interfaces. This index maintains thread -# relationships for use by Mail::Thread. This writes to the search -# index. +# relationships for use by PublicInbox::SearchThread. +# This writes to the search index. package PublicInbox::SearchIdx; use strict; use warnings; use Fcntl qw(:flock :DEFAULT); -use Email::MIME; +use PublicInbox::MIME; use Email::MIME::ContentType; $Email::MIME::ContentType::STRICT_PARAMS = 0; use base qw(PublicInbox::Search); @@ -155,7 +155,7 @@ sub add_message { if ($smsg) { # convert a ghost to a regular message # it will also clobber any existing regular message - $doc_id = $smsg->doc_id; + $doc_id = $smsg->{doc_id}; $old_tid = $smsg->thread_id; } $smsg = PublicInbox::SearchMsg->new($mime); @@ -181,6 +181,10 @@ sub add_message { msg_iter($mime, sub { my ($part, $depth, @idx) = @{$_[0]}; my $ct = $part->content_type || 'text/plain'; + my $fn = $part->filename; + if (defined $fn && $fn ne '') { + $tg->index_text($fn, 1, 'XFN'); + } return if $ct =~ m!\btext/x?html\b!i; @@ -285,14 +289,18 @@ sub link_message { my ($self, $smsg, $old_tid) = @_; my $doc = $smsg->{doc}; my $mid = $smsg->mid; - my $mime = $smsg->mime; + my $mime = $smsg->{mime}; my $hdr = $mime->header_obj; my $refs = $hdr->header_raw('References'); - my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : (); - if (my $irt = $hdr->header_raw('In-Reply-To')) { - # last References should be $irt - # we will de-dupe later - push @refs, mid_clean($irt); + my @refs = defined $refs ? ($refs =~ /<([^>]+)>/g) : (); + my $irt = $hdr->header_raw('In-Reply-To'); + if (defined $irt) { + if ($irt eq '') { + $irt = undef; + } else { + $irt = mid_clean($irt); + $irt = undef if $mid eq $irt; + } } my $tid; @@ -301,6 +309,15 @@ sub link_message { my @orig_refs = @refs; @refs = (); + if (defined $irt) { + # to check MAX_MID_SIZE + push @orig_refs, $irt; + + # below, we will ensure IRT (if specified) + # is the last References + $uniq{$irt} = 1; + } + # prevent circular references via References: here: foreach my $ref (@orig_refs) { if (length($ref) > MAX_MID_SIZE) { @@ -311,6 +328,11 @@ sub link_message { push @refs, $ref; } } + + # last References should be IRT, but some mail clients do things + # out of order, so trust IRT over References iff IRT exists + push @refs, $irt if defined $irt; + if (@refs) { $smsg->{references} = '<'.join('> <', @refs).'>'; @@ -382,7 +404,7 @@ sub do_cat_mail { my $str = $git->cat_file($blob, $sizeref); # fixup bugs from import: $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; - Email::MIME->new($str); + PublicInbox::MIME->new($str); }; $@ ? undef : $mime; }