From: Eric Wong <e@80x24.org>
Date: Fri, 9 Sep 2016 00:01:31 +0000 (+0000)
Subject: search: index attachment filenames
X-Git-Tag: v1.0.0~187
X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=0df58f99a71268c98bb21cab0a98ddd25a5b83b2;p=public-inbox.git

search: index attachment filenames

And while we're at it, ensure searching inside displayable
attachment bodies works.
---

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index ceee39af..0c056772 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -69,6 +69,7 @@ my %prob_prefix = (
 	tcf => 'XTO XCC A',
 	b => 'XNQ XQUOT',
 	bs => 'XNQ XQUOT S',
+	n => 'XFN',
 
 	# n.b.: leaving out "a:" alias for "tcf:" even though
 	# mairix supports it.  It is only mentioned in passing in mairix(1)
@@ -77,7 +78,7 @@ my %prob_prefix = (
 	nq => 'XNQ',
 
 	# default:
-	'' => 'XMID S A XNQ XQUOT',
+	'' => 'XMID S A XNQ XQUOT XFN',
 );
 
 # not documenting m: and mid: for now, the using the URLs works w/o Xapian
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index fb68f4b1..23aef9f3 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -181,6 +181,10 @@ sub add_message {
 		msg_iter($mime, sub {
 			my ($part, $depth, @idx) = @{$_[0]};
 			my $ct = $part->content_type || 'text/plain';
+			my $fn = $part->filename;
+			if (defined $fn && $fn ne '') {
+				$tg->index_text($fn, 1, 'XFN');
+			}
 
 			return if $ct =~ m!\btext/x?html\b!i;
 
diff --git a/t/search.t b/t/search.t
index bddb545a..cce3b9e2 100644
--- a/t/search.t
+++ b/t/search.t
@@ -386,6 +386,50 @@ sub filter_mids {
 	}
 }
 
+{
+	my $part1 = Email::MIME->create(
+                 attributes => {
+                     content_type => 'text/plain',
+                     disposition  => 'attachment',
+                     charset => 'US-ASCII',
+		     encoding => 'quoted-printable',
+		     filename => 'attached_fart.txt',
+                 },
+                 body_str => 'inside the attachment',
+	);
+	my $part2 = Email::MIME->create(
+                 attributes => {
+                     content_type => 'text/plain',
+                     disposition  => 'attachment',
+                     charset => 'US-ASCII',
+		     encoding => 'quoted-printable',
+		     filename => 'part_deux.txt',
+                 },
+                 body_str => 'inside another',
+	);
+	my $amsg = Email::MIME->create(
+		header_str => [
+			Subject => 'see attachment',
+			'Message-ID' => '<file@attached>',
+			From => 'John Smith <js@example.com>',
+			To => 'list@example.com',
+		],
+		parts => [ $part1, $part2 ],
+	);
+	ok($rw->add_message($amsg), 'added attachment');
+	$rw_commit->();
+	$ro->reopen;
+	my $n = $ro->query('n:attached_fart.txt');
+	is(scalar @{$n->{msgs}}, 1, 'got result for n:');
+	my $res = $ro->query('part_deux.txt');
+	is(scalar @{$res->{msgs}}, 1, 'got result without n:');
+	is($n->{msgs}->[0]->mid, $res->{msgs}->[0]->mid,
+		'same result with and without');
+	my $txt = $ro->query('"inside another"');
+	is($txt->{msgs}->[0]->mid, $res->{msgs}->[0]->mid,
+		'search inside text attachments works');
+}
+
 done_testing();
 
 1;