-# Copyright (C) 2018-2019 all contributors <meta@public-inbox.org>
+# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Various date/time-related functions
use warnings;
use base qw(Exporter);
our @EXPORT_OK = qw(msg_timestamp msg_datestamp);
-use Date::Parse qw(str2time strptime);
+use Time::Local qw(timegm);
+my @MoY = qw(january february march april may june
+ july august september october november december);
+my %MoY;
+@MoY{@MoY} = (0..11);
+@MoY{map { substr($_, 0, 3) } @MoY} = (0..11);
+
+my %OBSOLETE_TZ = ( # RFC2822 4.3 (Obsolete Date and Time)
+ EST => '-0500', EDT => '-0400',
+ CST => '-0600', CDT => '-0500',
+ MST => '-0700', MDT => '-0600',
+ PST => '-0800', PDT => '-0700',
+ UT => '+0000', GMT => '+0000', Z => '+0000',
+
+ # RFC2822 states:
+ # The 1 character military time zones were defined in a non-standard
+ # way in [RFC822] and are therefore unpredictable in their meaning.
+);
+my $OBSOLETE_TZ = join('|', keys %OBSOLETE_TZ);
sub str2date_zone ($) {
my ($date) = @_;
+ my ($ts, $zone);
+
+ # RFC822 is most likely for email, but we can tolerate an extra comma
+ # or punctuation as long as all the data is there.
+ # We'll use '\s' since Unicode spaces won't affect our parsing.
+ # SpamAssassin ignores commas and redundant spaces, too.
+ if ($date =~ /(?:[A-Za-z]+,?\s+)? # day-of-week
+ ([0-9]+),?\s+ # dd
+ ([A-Za-z]+)\s+ # mon
+ ([0-9]{2,4})\s+ # YYYY or YY (or YYY :P)
+ ([0-9]+)[:\.] # HH:
+ ((?:[0-9]{2})|(?:\s?[0-9])) # MM
+ (?:[:\.]((?:[0-9]{2})|(?:\s?[0-9])))? # :SS
+ \s+ # a TZ offset is required:
+ ([\+\-])? # TZ sign
+ [\+\-]* # I've seen extra "-" e.g. "--500"
+ ([0-9]+|$OBSOLETE_TZ)(?:\s|$) # TZ offset
+ /xo) {
+ my ($dd, $m, $yyyy, $hh, $mm, $ss, $sign, $tz) =
+ ($1, $2, $3, $4, $5, $6, $7, $8);
+ # don't accept non-English months
+ defined(my $mon = $MoY{lc($m)}) or return;
+
+ if (defined(my $off = $OBSOLETE_TZ{$tz})) {
+ $sign = substr($off, 0, 1);
+ $tz = substr($off, 1);
+ }
+
+ # Y2K problems: 3-digit years, follow RFC2822
+ if (length($yyyy) <= 3) {
+ $yyyy += 1900;
+
+ # and 2-digit years from '09 (2009) (0..49)
+ $yyyy += 100 if $yyyy < 1950;
+ }
- my $ts = str2time($date);
- return undef unless(defined $ts);
+ $ts = timegm($ss // 0, $mm, $hh, $dd, $mon, $yyyy);
- # off is the time zone offset in seconds from GMT
- my ($ss,$mm,$hh,$day,$month,$year,$off) = strptime($date);
- return undef unless(defined $off);
+ # 4-digit dates in non-spam from 1900s and 1910s exist in
+ # lore archives
+ return if $ts < 0;
- # Compute the time zone from offset
- my $sign = ($off < 0) ? '-' : '+';
- my $hour = abs(int($off / 3600));
- my $min = ($off / 60) % 60;
- my $zone = sprintf('%s%02d%02d', $sign, $hour, $min);
+ # Compute the time offset from [+-]HHMM
+ $tz //= 0;
+ my ($tz_hh, $tz_mm);
+ if (length($tz) == 1) {
+ $tz_hh = $tz;
+ $tz_mm = 0;
+ } elsif (length($tz) == 2) {
+ $tz_hh = 0;
+ $tz_mm = $tz;
+ } else {
+ $tz_hh = $tz;
+ $tz_hh =~ s/([0-9]{2})\z//;
+ $tz_mm = $1;
+ }
+ while ($tz_mm >= 60) {
+ $tz_mm -= 60;
+ $tz_hh += 1;
+ }
+ $sign //= '+';
+ my $off = $sign . ($tz_mm * 60 + ($tz_hh * 60 * 60));
+ $ts -= $off;
+ $sign = '+' if $off == 0;
+ $zone = sprintf('%s%02d%02d', $sign, $tz_hh, $tz_mm);
+
+ # Time::Zone and Date::Parse are part of the same distribution,
+ # and we need Time::Zone to deal with tz names like "EDT"
+ } elsif (eval { require Date::Parse }) {
+ $ts = Date::Parse::str2time($date);
+ return undef unless(defined $ts);
+
+ # off is the time zone offset in seconds from GMT
+ my ($ss,$mm,$hh,$day,$month,$year,$off) =
+ Date::Parse::strptime($date);
+ return unless defined($year);
+ $off //= 0;
+
+ # Compute the time zone from offset
+ my $sign = ($off < 0) ? '-' : '+';
+ my $hour = abs(int($off / 3600));
+ my $min = ($off / 60) % 60;
+
+ # deal with weird offsets like '-0420' properly
+ $min = 60 - $min if ($min && $off < 0);
+
+ $zone = sprintf('%s%02d%02d', $sign, $hour, $min);
+ } else {
+ warn "Date::Parse missing for non-RFC822 date: $date\n";
+ return undef;
+ }
+ # Note: we've already applied the offset to $ts at this point,
+ # but we want to keep "git fsck" happy.
# "-1200" is the furthest westermost zone offset,
# but git fast-import is liberal so we use "-1400"
if ($zone >= 1400 || $zone <= -1400) {
}
sub msg_received_at ($) {
- my ($hdr) = @_; # Email::MIME::Header
+ my ($hdr) = @_; # PublicInbox::Eml
my @recvd = $hdr->header_raw('Received');
my ($ts);
foreach my $r (@recvd) {
}
sub msg_date_only ($) {
- my ($hdr) = @_; # Email::MIME::Header
+ my ($hdr) = @_; # PublicInbox::Eml
my @date = $hdr->header_raw('Date');
my ($ts);
foreach my $d (@date) {
- # Y2K problems: 3-digit years
- $d =~ s!([A-Za-z]{3}) ([0-9]{3}) ([0-9]{2}:[0-9]{2}:[0-9]{2})!
- my $yyyy = $2 + 1900; "$1 $yyyy $3"!e;
$ts = eval { str2date_zone($d) } and return $ts;
if ($@) {
my $mid = $hdr->header_raw('Message-ID');
}
# Favors Received header for sorting globally
-sub msg_timestamp ($) {
- my ($hdr) = @_; # Email::MIME::Header
+sub msg_timestamp ($;$) {
+ my ($hdr, $fallback) = @_; # PublicInbox::Eml
my $ret;
$ret = msg_received_at($hdr) and return time_response($ret);
$ret = msg_date_only($hdr) and return time_response($ret);
- wantarray ? (time, '+0000') : time;
+ time_response([ $fallback // time, '+0000' ]);
}
# Favors the Date: header for display and sorting within a thread
-sub msg_datestamp ($) {
- my ($hdr) = @_; # Email::MIME::Header
+sub msg_datestamp ($;$) {
+ my ($hdr, $fallback) = @_; # PublicInbox::Eml
my $ret;
$ret = msg_date_only($hdr) and return time_response($ret);
$ret = msg_received_at($hdr) and return time_response($ret);
- wantarray ? (time, '+0000') : time;
+ time_response([ $fallback // time, '+0000' ]);
}
1;