From: Eric Wong (Contractor, The Linux Foundation) Date: Tue, 6 Mar 2018 04:15:38 +0000 (+0000) Subject: favor Received: date over Date: header globally X-Git-Tag: v1.1.0-pre1~174 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=35ac61764499c272d2760de2b2a432be412ecede favor Received: date over Date: header globally The first Received: header is believable since it typically hits the user's mail server and can be treated as relatively trustworthy. We still show the Date: in per-message (permalink) views, which may expose users for having incorrect Date: headers, but all the ISO YYYY-MM-DD dates we display will match what we see. --- diff --git a/MANIFEST b/MANIFEST index 7366aa0d..a42b9e1a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -73,6 +73,7 @@ lib/PublicInbox/MID.pm lib/PublicInbox/MIME.pm lib/PublicInbox/Mbox.pm lib/PublicInbox/MsgIter.pm +lib/PublicInbox/MsgTime.pm lib/PublicInbox/Msgmap.pm lib/PublicInbox/NNTP.pm lib/PublicInbox/NNTPD.pm diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index ddb63b10..7ba16683 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -12,8 +12,7 @@ use PublicInbox::Spawn qw(spawn); use PublicInbox::MID qw(mid_mime mid2path); use PublicInbox::Address; use PublicInbox::ContentId qw(content_id); -use Date::Parse qw(str2time); -use Time::Zone qw(tz_offset); +use PublicInbox::MsgTime qw(msg_timestamp); sub new { my ($class, $git, $name, $email, $ibx) = @_; @@ -204,37 +203,7 @@ sub remove { sub parse_date ($) { my ($mime) = @_; - my $hdr = $mime->header_obj; - my $date = $hdr->header_raw('Date'); - my ($ts, $zone); - my $mid = $hdr->header_raw('Message-ID'); - if ($date) { - $ts = eval { str2time($date) }; - if ($@) { - warn "bad Date: $date in $mid: $@\n"; - } elsif ($date =~ /\s+([\+\-]\d+)\s*\z/) { - $zone = $1; - } - } - unless ($ts) { - my @recvd = $hdr->header_raw('Received'); - foreach my $r (@recvd) { - $zone = undef; - $r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+ - \d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/osx or next; - $zone = $2; - $ts = eval { str2time($1) } and last; - warn "no date in Received: $r\n"; - } - } - $zone ||= '+0000'; - # "-1200" is the furthest westermost zone offset, - # but git fast-import is liberal so we use "-1400" - if ($zone >= 1400 || $zone <= -1400) { - warn "bogus TZ offset: $zone, ignoring and assuming +0000\n"; - $zone = '+0000'; - } - $ts = time unless defined $ts; + my ($ts, $zone) = msg_timestamp($mime->header_obj); $ts = 0 if $ts < 0; # git uses unsigned times "$ts $zone"; } diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm new file mode 100644 index 00000000..87664f4b --- /dev/null +++ b/lib/PublicInbox/MsgTime.pm @@ -0,0 +1,51 @@ +# Copyright (C) 2018 all contributors +# License: AGPL-3.0+ +package PublicInbox::MsgTime; +use strict; +use warnings; +use base qw(Exporter); +our @EXPORT_OK = qw(msg_timestamp); +use Date::Parse qw(str2time); +use Time::Zone qw(tz_offset); + +sub msg_timestamp ($) { + my ($hdr) = @_; # Email::MIME::Header + my ($ts, $zone); + my $mid; + my @recvd = $hdr->header_raw('Received'); + foreach my $r (@recvd) { + $zone = undef; + $r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+ + \d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/sx or next; + $zone = $2; + $ts = eval { str2time($1) } and last; + $mid ||= $hdr->header_raw('Message-ID'); + warn "no date in $mid Received: $r\n"; + } + unless (defined $ts) { + my @date = $hdr->header_raw('Date'); + foreach my $d (@date) { + $zone = undef; + $ts = eval { str2time($d) }; + if ($@) { + $mid ||= $hdr->header_raw('Message-ID'); + warn "bad Date: $d in $mid: $@\n"; + } elsif ($d =~ /\s+([\+\-]\d+)\s*\z/) { + $zone = $1; + } + } + } + $ts = time unless defined $ts; + return $ts unless wantarray; + + $zone ||= '+0000'; + # "-1200" is the furthest westermost zone offset, + # but git fast-import is liberal so we use "-1400" + if ($zone >= 1400 || $zone <= -1400) { + warn "bogus TZ offset: $zone, ignoring and assuming +0000\n"; + $zone = '+0000'; + } + ($ts, $zone); +} + +1; diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index a62a6490..23478a2a 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -7,9 +7,9 @@ package PublicInbox::SearchMsg; use strict; use warnings; use Search::Xapian; -use Date::Parse qw/str2time/; use PublicInbox::MID qw/mid_clean mid_mime/; use PublicInbox::Address; +use PublicInbox::MsgTime qw(msg_timestamp); sub new { my ($class, $mime) = @_; @@ -117,7 +117,9 @@ sub from_name { sub ts { my ($self) = @_; - $self->{ts} ||= eval { str2time($self->{mime}->header('Date')) } || 0; + $self->{ts} ||= eval { + msg_timestamp($self->{mime}->header_obj); + } || 0; } sub to_doc_data { diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index aad67488..f811f4f0 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -6,7 +6,7 @@ package PublicInbox::View; use strict; use warnings; -use Date::Parse qw/str2time/; +use PublicInbox::MsgTime qw(msg_timestamp); use PublicInbox::Hval qw/ascii_html obfuscate_addrs/; use PublicInbox::Linkify; use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape/; @@ -732,12 +732,6 @@ sub load_results { $srch->retry_reopen(sub { [ map { $_->mid; $_ } @$msgs ] }); } -sub msg_timestamp { - my ($hdr) = @_; - my $ts = eval { str2time($hdr->header('Date')) }; - defined($ts) ? $ts : 0; -} - sub thread_results { my ($msgs, $srch) = @_; require PublicInbox::SearchThread; diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index b69de856..bb574a7c 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -7,11 +7,11 @@ use strict; use warnings; use POSIX qw(strftime); -use Date::Parse qw(str2time); use Digest::SHA qw(sha1_hex); use PublicInbox::Address; use PublicInbox::Hval qw(ascii_html); use PublicInbox::MID qw/mid_clean mid_escape/; +use PublicInbox::MsgTime qw(msg_timestamp); # called by PSGI server after getline: sub close {} @@ -108,8 +108,7 @@ sub feed_entry { $irt = ''; } my $href = $base . mid_escape($mid) . '/'; - my $date = $hdr->header('Date'); - my $t = eval { str2time($date) } if defined $date; + my $t = msg_timestamp($hdr); my @t = gmtime(defined $t ? $t : time); my $updated = feed_updated(@t); diff --git a/scripts/import_vger_from_mbox b/scripts/import_vger_from_mbox index 8f0ec7cd..44698870 100644 --- a/scripts/import_vger_from_mbox +++ b/scripts/import_vger_from_mbox @@ -4,7 +4,6 @@ use strict; use warnings; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; -use Date::Parse qw/str2time/; use PublicInbox::MIME; use PublicInbox::Inbox; use PublicInbox::V2Writable;