lib/PublicInbox/Eml.pm | 8 +++++--- lib/PublicInbox/IMAP.pm | 2 ++ lib/PublicInbox/Smsg.pm | 3 --- t/imapd.t | 28 ++++++++++++++++++++++++++++ t/psgi_search.t | 7 ++++++- diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm index 485f637a3e7b41a9117cb37a9acfd8e7649dadfa..8b999e1a88ef8d11b78e7c5bd0c022dbd7d0f7be 100644 --- a/lib/PublicInbox/Eml.pm +++ b/lib/PublicInbox/Eml.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # # Lazy MIME parser, it still slurps the full message but keeps short @@ -144,6 +144,7 @@ sub header_raw { my $re = re_memo($_[1]); my @v = (${ $_[0]->{hdr} } =~ /$re/g); for (@v) { + utf8::decode($_); # SMTPUTF8 # for compatibility w/ Email::Simple::Header, s/\s+\z//s; s/\A\s+//s; @@ -359,14 +360,15 @@ return $$hdr =~ s!$re!!g if !@vals; $pfx .= ': '; my $len = 78 - length($pfx); @vals = map {; + utf8::encode(my $v = $_); # to bytes, support SMTPUTF8 # folding differs from Email::Simple::Header, # we favor tabs for visibility (and space savings :P) if (length($_) >= $len && (/\n[^ \t]/s || !/\n/s)) { local $Text::Wrap::columns = $len; local $Text::Wrap::huge = 'overflow'; - $pfx . wrap('', "\t", $_) . $self->{crlf}; + $pfx . wrap('', "\t", $v) . $self->{crlf}; } else { - $pfx . $_ . $self->{crlf}; + $pfx . $v . $self->{crlf}; } } @vals; $$hdr =~ s!$re!shift(@vals) // ''!ge; # replace current headers, first diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm index 1f65aa650e2954e2a43b8549fabf33c829c00f41..37317948490234f0c9ce2dc75f5bacc4541501ea 100644 --- a/lib/PublicInbox/IMAP.pm +++ b/lib/PublicInbox/IMAP.pm @@ -426,8 +426,10 @@ my ($v) = @_; if (!defined($v)) { 'NIL'; } elsif ($v =~ /[{"\r\n%*\\\[]/) { # literal string + utf8::encode($v); '{' . length($v) . "}\r\n" . $v; } else { # quoted string + utf8::encode($v); qq{"$v"} } } diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm index 2026c7d900f65c057ba4a1d8a4bea6c0c9ca400e..b132381b4ab2c3a93fd78b1fe042d1710a2c3fe8 100644 --- a/lib/PublicInbox/Smsg.pm +++ b/lib/PublicInbox/Smsg.pm @@ -99,9 +99,6 @@ # MIME decoding can create NULs, replace them with spaces # to protect git and NNTP clients $val =~ tr/\0\t\n/ /; - # rare: in case headers have wide chars (not RFC2047-encoded) - utf8::decode($val); - # lower-case fields for read-only stuff $self->{lc($f)} = $val; diff --git a/t/imapd.t b/t/imapd.t index 3c74aefd4beeeb806697e55dc503a720aecff5ef..cbd6c1b9961dec4c9c58484b108a3174c642bcb0 100644 --- a/t/imapd.t +++ b/t/imapd.t @@ -534,6 +534,34 @@ 'no backtraces from errors'); } } +{ + ok(my $ic = $imap_client->new(%mic_opt), 'logged in'); + my $mb = "$ibx[0]->{newsgroup}.$first_range"; + ok($ic->examine($mb), "EXAMINE $mb"); + my $uidnext = $ic->uidnext($mb); # we'll fetch BODYSTRUCTURE on this + my $im = $ibx[0]->importer(0); + $im->add(PublicInbox::Eml->new(< +From: Ævar Arnfjörð Bjarmason +To: git\@vger.kernel.org + +EOF + $im->done; + my $envl = $ic->get_envelope($uidnext); + is($envl->{subject}, 'test Ævar', 'UTF-8 subject'); + is($envl->{sender}->[0]->{personalname}, 'Ævar Arnfjörð Bjarmason', + 'UTF-8 sender[0].personalname'); + SKIP: { + skip 'need compress for comparisons', 1 if !$can_compress; + ok($ic = $imap_client->new(%mic_opt), 'uncompressed logged in'); + ok($ic && $ic->compress, 'compress enabled'); + ok($ic->examine($mb), "EXAMINE $mb"); + my $raw = $ic->get_envelope($uidnext); + is_deeply($envl, $raw, 'raw and compressed match'); + } +} + $td->kill; $td->join; is($?, 0, 'no error in exited process') if !$ENV{TEST_KILL_IMAPD}; diff --git a/t/psgi_search.t b/t/psgi_search.t index 3da93eda718cd7335c2b1f747f2a7f16e2bf880e..8868f67ee29a4187049a14264ec99122d5048cc2 100644 --- a/t/psgi_search.t +++ b/t/psgi_search.t @@ -1,5 +1,5 @@ #!perl -w -# Copyright (C) 2017-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; @@ -103,6 +103,11 @@ like($res->content, qr/\b1 partial match found\b/); like($res->content, $mid_re, 'found mid in response'); chop($digits); } + $res = $cb->(GET("/test/$mid/")); + $html = $res->content; + like($html, qr/\bFrom: Ævar /, + "displayed Ævar's name properly in permalink From:"); + unlike($html, qr/Ã/, 'no raw octets in permalink HTML'); $res = $cb->(GET('/test/')); $html = $res->content;