From: Eric Wong Date: Sat, 10 Dec 2016 01:09:46 +0000 (+0000) Subject: search: favor In-Reply-To over last References iff IRT exists X-Git-Tag: v1.0.0~150 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=86c018672f6bf9739a76489c8870c151d338fc15 search: favor In-Reply-To over last References iff IRT exists Some email clients set the References headers backwards, so trust the In-Reply-To header if (and only if) it exists and is parseable as direct parent of the current message. For affected repos, this will require reindexing (via "public-inbox-index --reindex"), but there will be no version bump for this bugfix. --- diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 4aac0281..832d1cbf 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -293,10 +293,10 @@ sub link_message { my $hdr = $mime->header_obj; my $refs = $hdr->header_raw('References'); my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : (); - if (my $irt = $hdr->header_raw('In-Reply-To')) { - # last References should be $irt - # we will de-dupe later - push @refs, mid_clean($irt); + my $irt = $hdr->header_raw('In-Reply-To'); + if (defined $irt) { + $irt = mid_clean($irt); + $irt = undef if $mid eq $irt; } my $tid; @@ -305,6 +305,15 @@ sub link_message { my @orig_refs = @refs; @refs = (); + if (defined $irt) { + # to check MAX_MID_SIZE + push @orig_refs, $irt; + + # below, we will ensure IRT (if specified) + # is the last References + $uniq{$irt} = 1; + } + # prevent circular references via References: here: foreach my $ref (@orig_refs) { if (length($ref) > MAX_MID_SIZE) { @@ -315,6 +324,11 @@ sub link_message { push @refs, $ref; } } + + # last References should be IRT, but some mail clients do things + # out of order, so trust IRT over References iff IRT exists + push @refs, $irt if defined $irt; + if (@refs) { $smsg->{references} = '<'.join('> <', @refs).'>';