X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSmsg.pm;h=260ce6bb065e930adfd9030d50f5a06359e809ce;hb=4eee5af6011cc8cdefb66c9729952c7eff5c0b0b;hp=a7ee2e409391d4729d800b45c31d20563ff29fe6;hpb=dee9a7231ca1019f56eea3596f0c428769e3eac1;p=public-inbox.git

diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm
index a7ee2e40..260ce6bb 100644
--- a/lib/PublicInbox/Smsg.pm
+++ b/lib/PublicInbox/Smsg.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
+# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # A small/skeleton/slim representation of a message.
@@ -9,26 +9,21 @@
 # large threads in our WWW UI and the NNTP range responses.
 package PublicInbox::Smsg;
 use strict;
-use warnings;
-use base qw(Exporter);
+use v5.10.1;
+use parent qw(Exporter);
 our @EXPORT_OK = qw(subject_normalized);
-use PublicInbox::MID qw(mids);
+use PublicInbox::MID qw(mids references);
 use PublicInbox::Address;
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
-use Time::Local qw(timegm);
 
-sub get_val ($$) {
-	my ($doc, $col) = @_;
-	# sortable_unserialise is defined by PublicInbox::Search::load_xapian()
-	sortable_unserialise($doc->get_value($col));
-}
+sub oidbin { pack('H*', $_[0]->{blob}) }
 
 sub to_doc_data {
 	my ($self) = @_;
 	join("\n",
 		$self->{subject},
 		$self->{from},
-		$self->references,
+		$self->{references} // '',
 		$self->{to},
 		$self->{cc},
 		$self->{blob},
@@ -40,6 +35,7 @@ sub to_doc_data {
 
 sub load_from_data ($$) {
 	my ($self) = $_[0]; # data = $_[1]
+	utf8::decode($_[1]);
 	(
 		$self->{subject},
 		$self->{from},
@@ -60,44 +56,41 @@ sub load_from_data ($$) {
 	) = split(/\n/, $_[1]);
 }
 
-sub load_expand {
-	my ($self, $doc) = @_;
-	my $data = $doc->get_data or return;
-	$self->{ts} = get_val($doc, PublicInbox::Search::TS());
-	my $dt = get_val($doc, PublicInbox::Search::DT());
-	my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $dt);
-	$self->{ds} = timegm($ss, $mm, $hh, $dd, $mon - 1, $yyyy);
-	utf8::decode($data);
-	load_from_data($self, $data);
-	$self;
-}
-
 sub psgi_cull ($) {
 	my ($self) = @_;
-	from_name($self); # fill in {from_name} so we can delete {from}
 
 	# drop NNTP-only fields which aren't relevant to PSGI results:
 	# saves ~80K on a 200 item search result:
-	delete @$self{qw(from ts to cc bytes lines)};
+	# TODO: we may need to keep some of these for JMAP...
+	my ($f) = delete @$self{qw(from tid to cc bytes lines)};
+	# ghosts don't have ->{from}
+	$self->{from_name} = join(', ', PublicInbox::Address::names($f // ''));
 	$self;
 }
 
-# Only called by PSGI interface, not NNTP
-sub from_mitem {
-	my ($mitem, $srch) = @_;
-	return $srch->retry_reopen(\&from_mitem, $mitem) if $srch;
-	my $self = bless {}, __PACKAGE__;
-	psgi_cull(load_expand($self, $mitem->get_document));
-}
-
-sub __hdr ($$) {
-	my ($self, $field) = @_;
-	$self->{lc($field)};
+sub parse_references ($$$) {
+	my ($smsg, $hdr, $mids) = @_;
+	my $refs = references($hdr);
+	push(@$refs, @$mids) if scalar(@$mids) > 1;
+	return $refs if scalar(@$refs) == 0;
+
+	# prevent circular references here:
+	my %seen = ( ($smsg->{mid} // '') => 1 );
+	my @keep;
+	foreach my $ref (@$refs) {
+		if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
+			warn "References: <$ref> too long, ignoring\n";
+			next;
+		}
+		$seen{$ref} //= push(@keep, $ref);
+	}
+	$smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
+	\@keep;
 }
 
-# for Import and v1 non-SQLite WWW code paths
+# used for v2, Import and v1 non-SQLite WWW code paths
 sub populate {
-	my ($self, $hdr, $v2w) = @_;
+	my ($self, $hdr, $sync) = @_;
 	for my $f (qw(From To Cc Subject)) {
 		my @all = $hdr->header($f);
 		my $val = join(', ', @all);
@@ -106,6 +99,9 @@ sub populate {
 		# to protect git and NNTP clients
 		$val =~ tr/\0\t\n/   /;
 
+		# rare: in case headers have wide chars (not RFC2047-encoded)
+		utf8::decode($val);
+
 		# lower-case fields for read-only stuff
 		$self->{lc($f)} = $val;
 
@@ -118,62 +114,38 @@ sub populate {
 		}
 		$self->{$f} = $val if $val ne '';
 	}
-	$v2w //= {};
-	$self->{-ds} = [ my @ds = msg_datestamp($hdr, $v2w->{autime}) ];
-	$self->{-ts} = [ my @ts = msg_timestamp($hdr, $v2w->{cotime}) ];
+	$sync //= {};
+	$self->{-ds} = [ my @ds = msg_datestamp($hdr, $sync->{autime}) ];
+	$self->{-ts} = [ my @ts = msg_timestamp($hdr, $sync->{cotime}) ];
 	$self->{ds} //= $ds[0]; # no zone
 	$self->{ts} //= $ts[0];
-
-	# for v1 users w/o SQLite
-	$self->{mid} //= eval { mids($hdr)->[0] } // '';
+	$self->{mid} //= mids($hdr)->[0];
 }
 
-sub subject ($) { __hdr($_[0], 'Subject') }
-sub to ($) { __hdr($_[0], 'To') }
-sub cc ($) { __hdr($_[0], 'Cc') }
-
 # no strftime, that is locale-dependent and not for RFC822
 my @DoW = qw(Sun Mon Tue Wed Thu Fri Sat);
 my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
 
-sub date ($) {
+sub date ($) { # for NNTP
 	my ($self) = @_;
 	my $ds = $self->{ds};
 	return unless defined $ds;
 	my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($ds);
 	"$DoW[$wday], " . sprintf("%02d $MoY[$mon] %04d %02d:%02d:%02d +0000",
 				$mday, $year+1900, $hour, $min, $sec);
-
 }
 
-sub from ($) {
+sub internaldate { # for IMAP
 	my ($self) = @_;
-	my $from = __hdr($self, 'From');
-	if (defined $from && !defined $self->{from_name}) {
-		my @n = PublicInbox::Address::names($from);
-		$self->{from_name} = join(', ', @n);
-	}
-	$from;
-}
-
-sub from_name {
-	my ($self) = @_;
-	my $from_name = $self->{from_name};
-	return $from_name if defined $from_name;
-	$self->from;
-	$self->{from_name};
-}
-
-sub references {
-	my ($self) = @_;
-	my $x = $self->{references};
-	defined $x ? $x : '';
+	my ($sec, $min, $hour, $mday, $mon, $year) = gmtime($self->{ts} // 0);
+	sprintf("%02d-$MoY[$mon]-%04d %02d:%02d:%02d +0000",
+				$mday, $year+1900, $hour, $min, $sec);
 }
 
-sub mid { $_[0]->{mid} }
-
 our $REPLY_RE = qr/^re:\s+/i;
 
+# TODO: see RFC 5256 sec 2.1 "Base Subject" and evaluate compatibility
+# w/ existing indices...
 sub subject_normalized ($) {
 	my ($subj) = @_;
 	$subj =~ s/\A\s+//s; # no leading space
@@ -184,4 +156,17 @@ sub subject_normalized ($) {
 	$subj;
 }
 
+# returns the number of bytes to add if given a non-CRLF arg
+sub crlf_adjust ($) {
+	if (index($_[0], "\r\n") < 0) {
+		# common case is LF-only, every \n needs an \r;
+		# so favor a cheap tr// over an expensive m//g
+		$_[0] =~ tr/\n/\n/;
+	} else { # count number of '\n' w/o '\r', expensive:
+		scalar(my @n = ($_[0] =~ m/(?<!\r)\n/g));
+	}
+}
+
+sub set_bytes { $_[0]->{bytes} = $_[2] + crlf_adjust($_[1]) }
+
 1;