X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSmsg.pm;h=b132381b4ab2c3a93fd78b1fe042d1710a2c3fe8;hb=5198c976ce8b1954f0f76a0da152cc434411f147;hp=c6ff7f52b0ac90a2aad2326f80c4b862b4cc982b;hpb=71461c67fee940b05309baa8c67bac10c8c51ac6;p=public-inbox.git diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm index c6ff7f52..b132381b 100644 --- a/lib/PublicInbox/Smsg.pm +++ b/lib/PublicInbox/Smsg.pm @@ -9,13 +9,15 @@ # large threads in our WWW UI and the NNTP range responses. package PublicInbox::Smsg; use strict; -use warnings; -use base qw(Exporter); +use v5.10.1; +use parent qw(Exporter); our @EXPORT_OK = qw(subject_normalized); -use PublicInbox::MID qw(mids); +use PublicInbox::MID qw(mids references); use PublicInbox::Address; use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); +sub oidbin { pack('H*', $_[0]->{blob}) } + sub to_doc_data { my ($self) = @_; join("\n", @@ -57,18 +59,35 @@ sub load_from_data ($$) { sub psgi_cull ($) { my ($self) = @_; - # ghosts don't have ->{from} - my $from = delete($self->{from}) // ''; - my @n = PublicInbox::Address::names($from); - $self->{from_name} = join(', ', @n); - # drop NNTP-only fields which aren't relevant to PSGI results: # saves ~80K on a 200 item search result: # TODO: we may need to keep some of these for JMAP... - delete @$self{qw(tid to cc bytes lines)}; + my ($f) = delete @$self{qw(from tid to cc bytes lines)}; + # ghosts don't have ->{from} + $self->{from_name} = join(', ', PublicInbox::Address::names($f // '')); $self; } +sub parse_references ($$$) { + my ($smsg, $hdr, $mids) = @_; + my $refs = references($hdr); + push(@$refs, @$mids) if scalar(@$mids) > 1; + return $refs if scalar(@$refs) == 0; + + # prevent circular references here: + my %seen = ( ($smsg->{mid} // '') => 1 ); + my @keep; + foreach my $ref (@$refs) { + if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) { + warn "References: <$ref> too long, ignoring\n"; + next; + } + $seen{$ref} //= push(@keep, $ref); + } + $smsg->{references} = '<'.join('> <', @keep).'>' if @keep; + \@keep; +} + # used for v2, Import and v1 non-SQLite WWW code paths sub populate { my ($self, $hdr, $sync) = @_; @@ -80,9 +99,6 @@ sub populate { # to protect git and NNTP clients $val =~ tr/\0\t\n/ /; - # rare: in case headers have wide chars (not RFC2047-encoded) - utf8::decode($val); - # lower-case fields for read-only stuff $self->{lc($f)} = $val; @@ -96,8 +112,10 @@ sub populate { $self->{$f} = $val if $val ne ''; } $sync //= {}; - $self->{-ds} = [ my @ds = msg_datestamp($hdr, $sync->{autime}) ]; - $self->{-ts} = [ my @ts = msg_timestamp($hdr, $sync->{cotime}) ]; + my @ds = msg_datestamp($hdr, $sync->{autime} // $self->{ds}); + my @ts = msg_timestamp($hdr, $sync->{cotime} // $self->{ts}); + $self->{-ds} = \@ds; + $self->{-ts} = \@ts; $self->{ds} //= $ds[0]; # no zone $self->{ts} //= $ts[0]; $self->{mid} //= mids($hdr)->[0]; @@ -125,6 +143,8 @@ sub internaldate { # for IMAP our $REPLY_RE = qr/^re:\s+/i; +# TODO: see RFC 5256 sec 2.1 "Base Subject" and evaluate compatibility +# w/ existing indices... sub subject_normalized ($) { my ($subj) = @_; $subj =~ s/\A\s+//s; # no leading space