From: Eric Wong (Contractor, The Linux Foundation) <e@80x24.org>
Date: Tue, 6 Mar 2018 04:15:38 +0000 (+0000)
Subject: favor Received: date over Date: header globally
X-Git-Tag: v1.1.0-pre1~174
X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=35ac61764499c272d2760de2b2a432be412ecede;p=public-inbox.git

favor Received: date over Date: header globally

The first Received: header is believable since it typically
hits the user's mail server and can be treated as relatively
trustworthy.  We still show the Date: in per-message (permalink)
views, which may expose users for having incorrect Date:
headers, but all the ISO YYYY-MM-DD dates we display will
match what we see.
---

diff --git a/MANIFEST b/MANIFEST
index 7366aa0d..a42b9e1a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -73,6 +73,7 @@ lib/PublicInbox/MID.pm
 lib/PublicInbox/MIME.pm
 lib/PublicInbox/Mbox.pm
 lib/PublicInbox/MsgIter.pm
+lib/PublicInbox/MsgTime.pm
 lib/PublicInbox/Msgmap.pm
 lib/PublicInbox/NNTP.pm
 lib/PublicInbox/NNTPD.pm
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index ddb63b10..7ba16683 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -12,8 +12,7 @@ use PublicInbox::Spawn qw(spawn);
 use PublicInbox::MID qw(mid_mime mid2path);
 use PublicInbox::Address;
 use PublicInbox::ContentId qw(content_id);
-use Date::Parse qw(str2time);
-use Time::Zone qw(tz_offset);
+use PublicInbox::MsgTime qw(msg_timestamp);
 
 sub new {
 	my ($class, $git, $name, $email, $ibx) = @_;
@@ -204,37 +203,7 @@ sub remove {
 
 sub parse_date ($) {
 	my ($mime) = @_;
-	my $hdr = $mime->header_obj;
-	my $date = $hdr->header_raw('Date');
-	my ($ts, $zone);
-	my $mid = $hdr->header_raw('Message-ID');
-	if ($date) {
-		$ts = eval { str2time($date) };
-		if ($@) {
-			warn "bad Date: $date in $mid: $@\n";
-		} elsif ($date =~ /\s+([\+\-]\d+)\s*\z/) {
-			$zone = $1;
-		}
-	}
-	unless ($ts) {
-		my @recvd = $hdr->header_raw('Received');
-		foreach my $r (@recvd) {
-			$zone = undef;
-			$r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+
-				\d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/osx or next;
-			$zone = $2;
-			$ts = eval { str2time($1) } and last;
-			warn "no date in Received: $r\n";
-		}
-	}
-	$zone ||= '+0000';
-	# "-1200" is the furthest westermost zone offset,
-	# but git fast-import is liberal so we use "-1400"
-	if ($zone >= 1400 || $zone <= -1400) {
-		warn "bogus TZ offset: $zone, ignoring and assuming +0000\n";
-		$zone = '+0000';
-	}
-	$ts = time unless defined $ts;
+	my ($ts, $zone) = msg_timestamp($mime->header_obj);
 	$ts = 0 if $ts < 0; # git uses unsigned times
 	"$ts $zone";
 }
diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm
new file mode 100644
index 00000000..87664f4b
--- /dev/null
+++ b/lib/PublicInbox/MsgTime.pm
@@ -0,0 +1,51 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::MsgTime;
+use strict;
+use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw(msg_timestamp);
+use Date::Parse qw(str2time);
+use Time::Zone qw(tz_offset);
+
+sub msg_timestamp ($) {
+	my ($hdr) = @_; # Email::MIME::Header
+	my ($ts, $zone);
+	my $mid;
+	my @recvd = $hdr->header_raw('Received');
+	foreach my $r (@recvd) {
+		$zone = undef;
+		$r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+
+			\d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/sx or next;
+		$zone = $2;
+		$ts = eval { str2time($1) } and last;
+		$mid ||= $hdr->header_raw('Message-ID');
+		warn "no date in $mid Received: $r\n";
+	}
+	unless (defined $ts) {
+		my @date = $hdr->header_raw('Date');
+		foreach my $d (@date) {
+			$zone = undef;
+			$ts = eval { str2time($d) };
+			if ($@) {
+				$mid ||= $hdr->header_raw('Message-ID');
+				warn "bad Date: $d in $mid: $@\n";
+			} elsif ($d =~ /\s+([\+\-]\d+)\s*\z/) {
+				$zone = $1;
+			}
+		}
+	}
+	$ts = time unless defined $ts;
+	return $ts unless wantarray;
+
+	$zone ||= '+0000';
+	# "-1200" is the furthest westermost zone offset,
+	# but git fast-import is liberal so we use "-1400"
+	if ($zone >= 1400 || $zone <= -1400) {
+		warn "bogus TZ offset: $zone, ignoring and assuming +0000\n";
+		$zone = '+0000';
+	}
+	($ts, $zone);
+}
+
+1;
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index a62a6490..23478a2a 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -7,9 +7,9 @@ package PublicInbox::SearchMsg;
 use strict;
 use warnings;
 use Search::Xapian;
-use Date::Parse qw/str2time/;
 use PublicInbox::MID qw/mid_clean mid_mime/;
 use PublicInbox::Address;
+use PublicInbox::MsgTime qw(msg_timestamp);
 
 sub new {
 	my ($class, $mime) = @_;
@@ -117,7 +117,9 @@ sub from_name {
 
 sub ts {
 	my ($self) = @_;
-	$self->{ts} ||= eval { str2time($self->{mime}->header('Date')) } || 0;
+	$self->{ts} ||= eval {
+		msg_timestamp($self->{mime}->header_obj);
+	} || 0;
 }
 
 sub to_doc_data {
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index aad67488..f811f4f0 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -6,7 +6,7 @@
 package PublicInbox::View;
 use strict;
 use warnings;
-use Date::Parse qw/str2time/;
+use PublicInbox::MsgTime qw(msg_timestamp);
 use PublicInbox::Hval qw/ascii_html obfuscate_addrs/;
 use PublicInbox::Linkify;
 use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape/;
@@ -732,12 +732,6 @@ sub load_results {
 	$srch->retry_reopen(sub { [ map { $_->mid; $_ } @$msgs ] });
 }
 
-sub msg_timestamp {
-	my ($hdr) = @_;
-	my $ts = eval { str2time($hdr->header('Date')) };
-	defined($ts) ? $ts : 0;
-}
-
 sub thread_results {
 	my ($msgs, $srch) = @_;
 	require PublicInbox::SearchThread;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index b69de856..bb574a7c 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -7,11 +7,11 @@ use strict;
 use warnings;
 
 use POSIX qw(strftime);
-use Date::Parse qw(str2time);
 use Digest::SHA qw(sha1_hex);
 use PublicInbox::Address;
 use PublicInbox::Hval qw(ascii_html);
 use PublicInbox::MID qw/mid_clean mid_escape/;
+use PublicInbox::MsgTime qw(msg_timestamp);
 
 # called by PSGI server after getline:
 sub close {}
@@ -108,8 +108,7 @@ sub feed_entry {
 		$irt = '';
 	}
 	my $href = $base . mid_escape($mid) . '/';
-	my $date = $hdr->header('Date');
-	my $t = eval { str2time($date) } if defined $date;
+	my $t = msg_timestamp($hdr);
 	my @t = gmtime(defined $t ? $t : time);
 	my $updated = feed_updated(@t);
 
diff --git a/scripts/import_vger_from_mbox b/scripts/import_vger_from_mbox
index 8f0ec7cd..44698870 100644
--- a/scripts/import_vger_from_mbox
+++ b/scripts/import_vger_from_mbox
@@ -4,7 +4,6 @@
 use strict;
 use warnings;
 use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
-use Date::Parse qw/str2time/;
 use PublicInbox::MIME;
 use PublicInbox::Inbox;
 use PublicInbox::V2Writable;