]> Sergey Matveev's repositories - public-inbox.git/commitdiff
www: viewdiff: fix UTF-8 names inside mbox attachments
authorEric Wong <e@80x24.org>
Mon, 12 Sep 2022 22:54:04 +0000 (22:54 +0000)
committerEric Wong <e@80x24.org>
Mon, 12 Sep 2022 22:56:03 +0000 (22:56 +0000)
This avoids `Wide character in print' warnings and ensures the
UTF-8 characters in `Signed-off-by' trailers are properly rendered
in HTML even when attempting to decode and display
application/octet-stream mbox attachments as HTML.

Linkification and reconstruction for coderepos is probably
still broken, but that is a much bigger task to fix, I think.

Fixes: ab9c03ff4aa369b3 ("www: use PerlIO::scalar (zfh) for buffering")
MANIFEST
lib/PublicInbox/ViewDiff.pm
t/data/attached-mbox-with-utf8.eml [new file with mode: 0644]
t/plack.t

index 8be912d0a8a907f8d621bb4c230a89f486acb12f..35382d2d160bfe5fa2b7ec391fd7d497288ea573 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -400,6 +400,7 @@ t/content_hash.t
 t/convert-compact.t
 t/data-gen/.gitignore
 t/data/0001.patch
+t/data/attached-mbox-with-utf8.eml
 t/data/binary.patch
 t/data/message_embed.eml
 t/dir_idle.t
index fba3d76c7b0cc10473261380c081301f93b5b119..9a7adb57770f5843892438644eea38067c24e483 100644 (file)
@@ -7,8 +7,7 @@
 # (or reconstruct) blobs.
 
 package PublicInbox::ViewDiff;
-use strict;
-use v5.10.1;
+use v5.12;
 use parent qw(Exporter);
 our @EXPORT_OK = qw(flush_diff uri_escape_path);
 use URI::Escape qw(uri_escape_utf8);
@@ -197,7 +196,8 @@ sub flush_diff ($$) {
                                $top[0] =~ $IS_OID) {
                        $dctx = diff_header(\$x, $ctx, \@top);
                } elsif ($dctx) {
-                       open(my $afh, '>>', \(my $after='')) or die "open: $!";
+                       open(my $afh, '>>:utf8', \(my $after='')) or
+                               die "open: $!";
 
                        # Quiet "Complex regular subexpression recursion limit"
                        # warning.  Perl will truncate matches upon hitting
@@ -213,7 +213,7 @@ sub flush_diff ($$) {
                                        (?:(?:^-[^\n]*\n)+)|
                                        (?:^@@ [^\n]+\n))/xsm, $x)) {
                                if (!defined($dctx)) {
-                                       print $afh $s;
+                                       print $afh $x;
                                } elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) {
                                        print $zfh qq(<span\nclass="hunk">),
                                                diff_hunk($dctx, $1, $2),
@@ -234,7 +234,10 @@ sub flush_diff ($$) {
                                        print $zfh $lnk->to_html($s);
                                }
                        }
-                       diff_before_or_after($ctx, \$after) if !$dctx;
+                       if (!$dctx) {
+                               utf8::decode($after);
+                               diff_before_or_after($ctx, \$after);
+                       }
                } else {
                        diff_before_or_after($ctx, \$x);
                }
diff --git a/t/data/attached-mbox-with-utf8.eml b/t/data/attached-mbox-with-utf8.eml
new file mode 100644 (file)
index 0000000..53dad83
--- /dev/null
@@ -0,0 +1,45 @@
+Date: Mon, 24 Sep 2018 09:46:40 -0700 (PDT)
+Message-Id: <attached-mbox-with-utf8@example>
+To: test@example.com
+Subject: [PATCHES] attached mbox with UTF-8 patch
+From: attacher@example.com
+Mime-Version: 1.0
+Content-Type: Multipart/Mixed;
+ boundary="--Next_Part(Mon_Sep_24_09_46_40_2018_110)--"
+Content-Transfer-Encoding: 7bit
+
+----Next_Part(Mon_Sep_24_09_46_40_2018_110)--
+Content-Type: Text/Plain; charset=us-ascii
+Content-Transfer-Encoding: 7bit
+
+hello world
+
+----Next_Part(Mon_Sep_24_09_46_40_2018_110)--
+Content-Type: Application/Octet-Stream
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; filename="foo.mbox"
+
+RnJvbSAzNGRkMWQyNWQ3NmU0NjRjNTM0ZGI0MDllYTdlZDQyNWFiMDVjODI2IE1vbiBTZXAgMTcg
+MDA6MDA6MDAgMjAwMQpGcm9tOiA9P1VURi04P3E/Qmo9QzM9Qjhybj89IDxiam9ybkBleGFtcGxl
+LmNvbT4KRGF0ZTogVGh1LCAxMiBTZXAgMjAxOSAxMDo0MjowMCArMDIwMApNSU1FLVZlcnNpb246
+IDEuMApDb250ZW50LVR5cGU6IHRleHQvcGxhaW47IGNoYXJzZXQ9VVRGLTgKQ29udGVudC1UcmFu
+c2Zlci1FbmNvZGluZzogOGJpdAoKU2lnbmVkLW9mZi1ieTogQmrDuHJuIDxiam9ybkBleGFtcGxl
+LmNvbT4KU2lnbmVkLW9mZi1ieTogaiDFu2VuIDx6QGV4YW1wbGUuY29tPgotLS0KIGZvby5jIHwg
+MSArLQogMSBmaWxlIGNoYW5nZWQsIDEgaW5zZXJ0aW9ucygrKSwgMSBkZWxldGlvbnMoLSkKCmRp
+ZmYgLS1naXQgYS9mb28uYyBiL2Zvby5jCmluZGV4IDVjNDJjZjgxYTA4Yi4uODVmYmE2NGMzZmNm
+IDEwMDY0NAotLS0gYS9mb28uYworKysgYi9mb28uYwpAQCAtMjIxLDkgKzIyMSw5IEBAIGludCBo
+ZWxsbyh2b2lkKQogCQlnb3RvIHBoYWlsOwogCX0KIHNraXA6Ci0JaWYgKAlmb28gJiYKKwl1bmxl
+c3MgKGZvbykKIGJsYWgKIGJsYWgKIGJsYWgKLS0gCkJqw7hybgoKRnJvbSAzNGRkMWQyNWQ3NmU0
+NjRjNTM0ZGI0MDllYTdlZDQyNWFiMDVjODI2IE1vbiBTZXAgMTcgMDA6MDA6MDAgMjAwMQpGcm9t
+OiA9P1VURi04P3E/Qmo9QzM9Qjhybj89IDxiam9ybkBleGFtcGxlLmNvbT4KRGF0ZTogVGh1LCAx
+MiBTZXAgMjAxOSAxMDo0MjowMCArMDIwMApNSU1FLVZlcnNpb246IDEuMApDb250ZW50LVR5cGU6
+IHRleHQvcGxhaW47IGNoYXJzZXQ9VVRGLTgKQ29udGVudC1UcmFuc2Zlci1FbmNvZGluZzogOGJp
+dAoKU2lnbmVkLW9mZi1ieTogQmrDuHJuIDxiam9ybkBleGFtcGxlLmNvbT4KU2lnbmVkLW9mZi1i
+eTogaiDFu2VuIDx6QGV4YW1wbGUuY29tPgotLS0KIGZvby5jIHwgMSArLQogMSBmaWxlIGNoYW5n
+ZWQsIDEgaW5zZXJ0aW9ucygrKSwgMSBkZWxldGlvbnMoLSkKCmRpZmYgLS1naXQgYS9mb28uYyBi
+L2Zvby5jCmluZGV4IDVjNDJjZjgxYTA4Yi4uODVmYmE2NGMzZmNmIDEwMDY0NAotLS0gYS9mb28u
+YworKysgYi9mb28uYwpAQCAtMjIxLDkgKzIyMSw5IEBAIGludCBoZWxsbyh2b2lkKQogCQlnb3Rv
+IHBoYWlsOwogCX0KIHNraXA6Ci0JaWYgKAlmb28gJiYKKwl1bmxlc3MgKGZvbykKIGJsYWgKIGJs
+YWgKIGJsYWgKLS0gCkJqw7hybgo=
+
+----Next_Part(Mon_Sep_24_09_46_40_2018_110)----
index 1cee286d63a61e157e53dc88c8ba0e9dec9564e4..7f80f488aef9131582043eec14de8898ff5fe179 100644 (file)
--- a/t/plack.t
+++ b/t/plack.t
@@ -13,7 +13,7 @@ my ($tmpdir, $for_destroy) = tmpdir();
 my $pfx = 'http://example.com/test';
 my $eml = eml_load('t/iso-2202-jp.eml');
 # ensure successful message deliveries
-my $ibx = create_inbox('test-1', sub {
+my $ibx = create_inbox('u8-2', sub {
        my ($im, $ibx) = @_;
        my $addr = $ibx->{-primary_address};
        $im->add($eml) or xbail '->add';
@@ -39,6 +39,8 @@ EOF
        # multipart with attached patch + filename
        $im->add(eml_load('t/plack-attached-patch.eml')) or BAIL_OUT '->add';
 
+       $im->add(eml_load('t/data/attached-mbox-with-utf8.eml')) or xbail 'add';
+
        # multipart collapsed to single quoted-printable text/plain
        $im->add(eml_load('t/plack-qp.eml')) or BAIL_OUT '->add';
        my $crlf = <<EOF;
@@ -181,6 +183,9 @@ my $c1 = sub {
        $res = $cb->(GET($pfx . '/qp@example.com/'));
        like($res->content, qr/\bhi = bye\b/, "HTML output decoded QP");
 
+       $res = $cb->(GET($pfx . '/attached-mbox-with-utf8@example/'));
+       like($res->content, qr/: Bj&#248;rn /, 'UTF-8 in mbox #1');
+       like($res->content, qr/: j &#379;en/, 'UTF-8 in mbox #2');
 
        $res = $cb->(GET($pfx . '/blah@example.com/raw'));
        is(200, $res->code, 'success response received for /*/raw');
@@ -246,7 +251,6 @@ my $c1 = sub {
                        'redirect from x40 MIDs works');
        }
 
-
        # dumb HTTP clone/fetch support
        $path = '/test/info/refs';
        my $req = HTTP::Request->new('GET' => $path);