lib/PublicInbox/Eml.pm | 8 +++++---
lib/PublicInbox/IMAP.pm | 2 ++
lib/PublicInbox/Smsg.pm | 3 ---
t/imapd.t | 28 ++++++++++++++++++++++++++++
t/psgi_search.t | 7 ++++++-
diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm
index 485f637a3e7b41a9117cb37a9acfd8e7649dadfa..8b999e1a88ef8d11b78e7c5bd0c022dbd7d0f7be 100644
--- a/lib/PublicInbox/Eml.pm
+++ b/lib/PublicInbox/Eml.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors
+# Copyright (C) all contributors
# License: AGPL-3.0+
#
# Lazy MIME parser, it still slurps the full message but keeps short
@@ -144,6 +144,7 @@ sub header_raw {
my $re = re_memo($_[1]);
my @v = (${ $_[0]->{hdr} } =~ /$re/g);
for (@v) {
+ utf8::decode($_); # SMTPUTF8
# for compatibility w/ Email::Simple::Header,
s/\s+\z//s;
s/\A\s+//s;
@@ -359,14 +360,15 @@ return $$hdr =~ s!$re!!g if !@vals;
$pfx .= ': ';
my $len = 78 - length($pfx);
@vals = map {;
+ utf8::encode(my $v = $_); # to bytes, support SMTPUTF8
# folding differs from Email::Simple::Header,
# we favor tabs for visibility (and space savings :P)
if (length($_) >= $len && (/\n[^ \t]/s || !/\n/s)) {
local $Text::Wrap::columns = $len;
local $Text::Wrap::huge = 'overflow';
- $pfx . wrap('', "\t", $_) . $self->{crlf};
+ $pfx . wrap('', "\t", $v) . $self->{crlf};
} else {
- $pfx . $_ . $self->{crlf};
+ $pfx . $v . $self->{crlf};
}
} @vals;
$$hdr =~ s!$re!shift(@vals) // ''!ge; # replace current headers, first
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index 1f65aa650e2954e2a43b8549fabf33c829c00f41..37317948490234f0c9ce2dc75f5bacc4541501ea 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -426,8 +426,10 @@ my ($v) = @_;
if (!defined($v)) {
'NIL';
} elsif ($v =~ /[{"\r\n%*\\\[]/) { # literal string
+ utf8::encode($v);
'{' . length($v) . "}\r\n" . $v;
} else { # quoted string
+ utf8::encode($v);
qq{"$v"}
}
}
diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm
index 2026c7d900f65c057ba4a1d8a4bea6c0c9ca400e..b132381b4ab2c3a93fd78b1fe042d1710a2c3fe8 100644
--- a/lib/PublicInbox/Smsg.pm
+++ b/lib/PublicInbox/Smsg.pm
@@ -99,9 +99,6 @@ # MIME decoding can create NULs, replace them with spaces
# to protect git and NNTP clients
$val =~ tr/\0\t\n/ /;
- # rare: in case headers have wide chars (not RFC2047-encoded)
- utf8::decode($val);
-
# lower-case fields for read-only stuff
$self->{lc($f)} = $val;
diff --git a/t/imapd.t b/t/imapd.t
index 3c74aefd4beeeb806697e55dc503a720aecff5ef..cbd6c1b9961dec4c9c58484b108a3174c642bcb0 100644
--- a/t/imapd.t
+++ b/t/imapd.t
@@ -534,6 +534,34 @@ 'no backtraces from errors');
}
}
+{
+ ok(my $ic = $imap_client->new(%mic_opt), 'logged in');
+ my $mb = "$ibx[0]->{newsgroup}.$first_range";
+ ok($ic->examine($mb), "EXAMINE $mb");
+ my $uidnext = $ic->uidnext($mb); # we'll fetch BODYSTRUCTURE on this
+ my $im = $ibx[0]->importer(0);
+ $im->add(PublicInbox::Eml->new(<
+From: Ævar Arnfjörð Bjarmason
+To: git\@vger.kernel.org
+
+EOF
+ $im->done;
+ my $envl = $ic->get_envelope($uidnext);
+ is($envl->{subject}, 'test Ævar', 'UTF-8 subject');
+ is($envl->{sender}->[0]->{personalname}, 'Ævar Arnfjörð Bjarmason',
+ 'UTF-8 sender[0].personalname');
+ SKIP: {
+ skip 'need compress for comparisons', 1 if !$can_compress;
+ ok($ic = $imap_client->new(%mic_opt), 'uncompressed logged in');
+ ok($ic && $ic->compress, 'compress enabled');
+ ok($ic->examine($mb), "EXAMINE $mb");
+ my $raw = $ic->get_envelope($uidnext);
+ is_deeply($envl, $raw, 'raw and compressed match');
+ }
+}
+
$td->kill;
$td->join;
is($?, 0, 'no error in exited process') if !$ENV{TEST_KILL_IMAPD};
diff --git a/t/psgi_search.t b/t/psgi_search.t
index 3da93eda718cd7335c2b1f747f2a7f16e2bf880e..8868f67ee29a4187049a14264ec99122d5048cc2 100644
--- a/t/psgi_search.t
+++ b/t/psgi_search.t
@@ -1,5 +1,5 @@
#!perl -w
-# Copyright (C) 2017-2021 all contributors
+# Copyright (C) all contributors
# License: AGPL-3.0+
use strict;
use v5.10.1;
@@ -103,6 +103,11 @@ like($res->content, qr/\b1 partial match found\b/);
like($res->content, $mid_re, 'found mid in response');
chop($digits);
}
+ $res = $cb->(GET("/test/$mid/"));
+ $html = $res->content;
+ like($html, qr/\bFrom: Ævar /,
+ "displayed Ævar's name properly in permalink From:");
+ unlike($html, qr/Ã/, 'no raw octets in permalink HTML');
$res = $cb->(GET('/test/'));
$html = $res->content;