use Email::MIME;
use Email::Address;
use URI::Escape qw/uri_escape/;
-use Encode qw/encode decode/;
+use Encode qw/find_encoding/;
use Encode::MIME::Header;
use CGI qw(escapeHTML);
use POSIX qw(strftime);
use constant DATEFMT => '%Y-%m-%dT%H:%M:%SZ';
use PublicInbox::View;
use Mail::Thread;
+my $enc_utf8 = find_encoding('utf8');
+my $enc_ascii = find_encoding('us-ascii');
+my $enc_mime = find_encoding('MIME-Header');
# FIXME: workaround https://rt.cpan.org/Public/Bug/Display.html?id=22817
my $top = $args->{top}; # bool
local $ENV{GIT_DIR} = $args->{git_dir};
my $feed_opts = get_feedopts($args);
- my $title = escapeHTML($feed_opts->{description} || "");
+ my $title = xs_html($feed_opts->{description} || "");
my @messages;
each_recent_blob($max, sub {
my $str = `git cat-file blob $_[0]`;
my ($simple, $name) = @_;
my $val = $simple->header($name);
return "" unless defined $val;
- $val =~ tr/\t\r\n / /s;
- encode('utf8', decode('MIME-Header', $val));
+ $val =~ tr/\t\n / /s;
+ $val =~ tr/\r//d;
+ $enc_utf8->encode($enc_mime->decode($val));
}
sub feed_date {
my @from = Email::Address->parse($from);
$from = $from[0]->name;
(defined($from) && length($from)) or $from = $from[0]->address;
- $from = escapeHTML($from);
- $subj = escapeHTML($subj);
- $args->[0] .= "<a href=\"$url.html\">`-> $subj</a> $from\n";
+ $from = xs_html($from);
+ $subj = xs_html($subj);
+ $args->[0] .= "<a href=\"$url.html\">$subj</a> $from\n";
} else {
$args->[0] .= "[ Message not available ]\n";
}
dump_html_line($self->next, $level, $args) if $self->next;
}
+sub xs_html {
+ $enc_ascii->encode(escapeHTML($enc_utf8->decode($_[0])),
+ Encode::HTMLCREF);
+}
+
1;
use warnings;
use URI::Escape qw/uri_escape/;
use CGI qw/escapeHTML/;
-use Encode qw/decode encode/;
+use Encode qw/find_encoding/;
use Encode::MIME::Header;
+use Email::MIME::ContentType qw/parse_content_type/;
+
+my $enc_utf8 = find_encoding('utf8');
+my $enc_ascii = find_encoding('us-ascii');
+my $enc_mime = find_encoding('MIME-Header');
# public functions:
sub as_html {
# only private functions below.
+sub enc_for {
+ my ($ct) = @_;
+ defined $ct or return $enc_utf8;
+ my $ct_parsed = parse_content_type($ct);
+ if ($ct_parsed) {
+ if (my $charset = $ct_parsed->{attributes}->{charset}) {
+ my $enc = find_encoding($charset);
+ return $enc if $enc;
+ }
+ }
+ $enc_utf8;
+}
+
sub multipart_text_as_html {
my ($mime, $full_pfx) = @_;
my $rv = "";
my $part_nr = 0;
+ my $enc_msg = enc_for($mime->header("Content-Type"));
# scan through all parts, looking for displayable text
$mime->walk_parts(sub {
my ($part) = @_;
return if $part->subparts; # walk_parts already recurses
-
- my $fn = $part->filename;
+ my $enc = enc_for($part->content_type) || $enc_msg || $enc_utf8;
if ($part_nr > 0) {
+ my $fn = $part->filename;
defined($fn) or $fn = "part #" . ($part_nr + 1);
- $rv .= add_filename_line($fn);
+ $rv .= add_filename_line($enc->decode($fn));
}
if (defined $full_pfx) {
- $rv .= add_text_body_short($part, $part_nr,
+ $rv .= add_text_body_short($enc, $part, $part_nr,
$full_pfx);
} else {
- $rv .= add_text_body_full($part, $part_nr);
+ $rv .= add_text_body_full($enc, $part, $part_nr);
}
$rv .= "\n" unless $rv =~ /\n\z/s;
++$part_nr;
$len -= length($fn);
$pad x= ($len/2) if ($len > 0);
- "$pad " . escapeHTML($fn) . " $pad\n";
+ "$pad " . ascii_html($fn) . " $pad\n";
}
sub add_text_body_short {
- my ($part, $part_nr, $full_pfx) = @_;
+ my ($enc, $part, $part_nr, $full_pfx) = @_;
my $n = 0;
- my $s = escapeHTML($part->body);
+ my $s = ascii_html($enc->decode($part->body));
$s =~ s!^((?:(?:>[^\n]+)\n)+)!
my $cur = $1;
my @lines = split(/\n/, $cur);
}
sub add_text_body_full {
- my ($part, $part_nr) = @_;
+ my ($enc, $part, $part_nr) = @_;
my $n = 0;
- my $s = escapeHTML($part->body);
+ my $s = ascii_html($enc->decode($part->body));
$s =~ s!^((?:(?:>[^\n]+)\n)+)!
my $cur = $1;
my @lines = split(/\n/, $cur);
sub trim_message_id {
my ($mid) = @_;
- $mid =~ s/\A<//;
- $mid =~ s/>\z//;
- my $html = escapeHTML($mid);
- my $href = escapeHTML(uri_escape($mid));
+ $mid = $enc_mime->decode($mid);
+ $mid =~ s/\A\s*<//;
+ $mid =~ s/>\s*\z//;
+ my $html = ascii_html($mid);
+ my $href = ascii_html(uri_escape($mid));
($html, $href);
}
+sub ascii_html {
+ $enc_ascii->encode(escapeHTML($_[0]), Encode::HTMLCREF);
+}
+
sub headers_to_html_header {
my ($simple) = @_;
foreach my $h (qw(From To Cc Subject Date)) {
my $v = $simple->header($h);
defined $v or next;
- $v = decode("MIME-Header", $v);
- $v = encode("utf8", $v);
- $v = escapeHTML($v);
- $v =~ tr/\n/ /;
+ $v =~ tr/\n/ /s;
+ $v =~ tr/\r//d;
+ $v = ascii_html($enc_mime->decode($v));
$rv .= "$h: $v\n";
if ($h eq "From" || $h eq "Subject") {