]> Sergey Matveev's repositories - public-inbox.git/commitdiff
msgiter: do not assume UTF-8 if Email::MIME->body_str succeeds
authorEric Wong <e@80x24.org>
Thu, 31 Oct 2019 00:33:04 +0000 (00:33 +0000)
committerEric Wong <e@80x24.org>
Thu, 31 Oct 2019 00:33:50 +0000 (00:33 +0000)
ISO-2202-JP and other non-UTF-8 messages need to be displayed
correctly.

Fixes: 7d82a8bc04ce ('handle "multipart/mixed" messages which are not multipart')
MANIFEST
lib/PublicInbox/MsgIter.pm
t/iso-2202-jp.mbox [new file with mode: 0644]
t/msg_iter.t

index d1b6749a42f4e7adf8a1605ce98619de408507b5..dfabd7f20385545faf2bf4f6b4eb6ca9a35dc9af 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -231,6 +231,7 @@ t/inbox.t
 t/indexlevels-mirror-v1.t
 t/indexlevels-mirror.t
 t/init.t
+t/iso-2202-jp.mbox
 t/linkify.t
 t/main-bin/spamc
 t/mda.t
index ce08ff846ee95dd281bdb3f3aee6312e665cde8e..f11ba223c93362a985285555e4113feab87684f1 100644 (file)
@@ -45,7 +45,8 @@ sub msg_part_text ($$) {
        # times when it should not have been:
        #   <87llgalspt.fsf@free.fr>
        #   <200308111450.h7BEoOu20077@mail.osdl.org>
-       if ($ct =~ m!\btext/plain\b!i || $ct =~ m!\bmultipart/mixed\b!i) {
+       if ($err && ($ct =~ m!\btext/plain\b!i ||
+                       $ct =~ m!\bmultipart/mixed\b!i)) {
                # Try to assume UTF-8 because Alpine seems to
                # do wacky things and set charset=X-UNKNOWN
                $part->charset_set('UTF-8');
diff --git a/t/iso-2202-jp.mbox b/t/iso-2202-jp.mbox
new file mode 100644 (file)
index 0000000..1a8e197
--- /dev/null
@@ -0,0 +1,10 @@
+From historical@ruby-dev Thu Jan  1 00:00:00 1970
+Message-Id: <199707281508.AAA24167@hoyogw.example>
+Date: Tue, 29 Jul 97 00:08:29 +0900
+From: matz@example.com
+Subject: [ruby-dev:4]
+To: ruby-dev@example
+Mime-Version: 1.0
+Content-Type: text/plain; charset=ISO-2022-JP
+
+|\e$B$1$$$8$e!w:#$O%U%j!<\e(B(^^;;;\e$B$G$9\e(B.
index f6fd3bb025c9d5d247b4500527458b1386136286..f9b586f183d96f7cc54ee605d418ee0788c705b0 100644 (file)
@@ -40,5 +40,23 @@ use_ok('PublicInbox::MsgIter');
                'nested part shows up properly');
 }
 
+{
+       my $f = 't/iso-2202-jp.mbox';
+       my $mime = Email::MIME->new(do {
+               open my $fh, '<', $f or die "open($f): $!";
+               local $/;
+               <$fh>;
+       });
+       my $raw = '';
+       msg_iter($mime, sub {
+               my ($part, $level, @ex) = @{$_[0]};
+               my ($s, $err) = msg_part_text($part, 'text/plain');
+               ok(!$err, 'no error');
+               $raw .= $s;
+       });
+       ok(length($raw) > 0, 'got non-empty message');
+       is(index($raw, '$$$'), -1, 'no unescaped $$$');
+}
+
 done_testing();
 1;