From b6974dbc81665427413020414a668ddb742e68f9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 14 Jun 2016 06:54:57 +0000 Subject: [PATCH] nntp: do not double-encode UTF-8 body Or whatever the appropriate Perl terminology, is... And we will need to do something appropriate for other encodings, too. I still barely understand Perl Unicode despite attempting to understand the docs over the years.. --- lib/PublicInbox/NNTP.pm | 17 ++++++++++++----- t/nntpd.t | 2 ++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index e4e3de4a..e8683210 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -512,6 +512,12 @@ sub set_art { $self->{article} = $art if defined $art && $art =~ /\A\d+\z/; } +sub _header ($) { + my $hdr = $_[0]->header_obj->as_string; + utf8::encode($hdr); + $hdr +} + sub cmd_article ($;$) { my ($self, $art) = @_; my $r = art_lookup($self, $art, 1); @@ -519,7 +525,7 @@ sub cmd_article ($;$) { my ($n, $mid, $s) = @$r; set_art($self, $art); more($self, "220 $n <$mid> article retrieved - head and body follow"); - do_more($self, $s->header_obj->as_string); + do_more($self, _header($s)); do_more($self, "\r\n"); simple_body_write($self, $s); } @@ -531,7 +537,7 @@ sub cmd_head ($;$) { my ($n, $mid, $s) = @$r; set_art($self, $art); more($self, "221 $n <$mid> article retrieved - head follows"); - do_more($self, $s->header_obj->as_string); + do_more($self, _header($s)); '.' } @@ -738,6 +744,7 @@ sub hdr_searchmsg ($$$$) { foreach my $s (@$msgs) { $tmp .= $s->num . ' ' . $s->$field . "\r\n"; } + utf8::encode($tmp); do_more($self, $tmp); # -1 to adjust for implicit increment in long_response $$i = $nr ? $$i + $nr - 1 : long_response_limit; @@ -826,7 +833,7 @@ sub over_line ($$) { my ($num, $smsg) = @_; # n.b. field access and procedural calls can be # 10%-15% faster than OO method calls: - join("\t", $num, + my $s = join("\t", $num, $smsg->{subject}, $smsg->{from}, PublicInbox::SearchMsg::date($smsg), @@ -834,6 +841,8 @@ sub over_line ($$) { $smsg->{references}, PublicInbox::SearchMsg::bytes($smsg), PublicInbox::SearchMsg::lines($smsg)); + utf8::encode($s); + $s } sub cmd_over ($;$) { @@ -896,7 +905,6 @@ sub cmd_xpath ($$) { sub res ($$) { my ($self, $line) = @_; - utf8::encode($line); do_write($self, $line . "\r\n"); } @@ -931,7 +939,6 @@ use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0; sub do_more ($$) { my ($self, $data) = @_; - utf8::encode($data); if (MSG_MORE && !$self->{write_buf_size}) { my $n = send($self->{sock}, $data, MSG_MORE); if (defined $n) { diff --git a/t/nntpd.t b/t/nntpd.t index 5f4ba57b..5875b737 100644 --- a/t/nntpd.t +++ b/t/nntpd.t @@ -175,6 +175,8 @@ EOF is_deeply($n->head(1), $n->head(''), 'HEAD OK'); is_deeply($n->body(1), $n->body(''), 'BODY OK'); + is($n->body(1)->[0], "This is a test message for El\xc3\xa9anor\n", + 'body really matches'); my $art = $n->article(1); is(ref($art), 'ARRAY', 'got array for ARTICLE'); is_deeply($art, $n->article(''), 'ARTICLE OK'); -- 2.44.0