From 483a67ca0613a75bb80ea4c1201cb2d5f2cf063d Mon Sep 17 00:00:00 2001
From: Eric Wong <e@80x24.org>
Date: Thu, 24 Apr 2014 00:21:21 +0000
Subject: [PATCH] html: refactor header value handling to be OO

This helps us keep track of escaping which needs to be done
for various levels.
---
 MANIFEST                |  1 +
 lib/PublicInbox/Feed.pm | 29 +++++++++++-----------
 lib/PublicInbox/Hval.pm | 53 +++++++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/View.pm | 24 +++++++------------
 4 files changed, 77 insertions(+), 30 deletions(-)
 create mode 100644 lib/PublicInbox/Hval.pm

diff --git a/MANIFEST b/MANIFEST
index 893cf3fd..b212c76b 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -10,6 +10,7 @@ lib/PublicInbox/MDA.pm
 lib/PublicInbox/Config.pm
 lib/PublicInbox/Feed.pm
 lib/PublicInbox/Filter.pm
+lib/PublicInbox/Hval.pm
 lib/PublicInbox/View.pm
 public-inbox-mda
 sa_config/Makefile
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 93ee80bb..abfc0a9a 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -4,11 +4,11 @@ package PublicInbox::Feed;
 use strict;
 use warnings;
 use Email::Address;
-use URI::Escape qw/uri_escape/;
 use Encode qw/find_encoding/;
 use Encode::MIME::Header;
 use CGI qw(escapeHTML);
 use Date::Parse qw(strptime str2time);
+use PublicInbox::Hval;
 eval { require Git }; # this is GPLv2+, so we are OK to use it
 use constant {
 	DATEFMT => '%Y-%m-%dT%H:%M:%SZ',
@@ -262,10 +262,9 @@ sub add_to_feed {
 	my $midurl = $feed_opts->{midurl} || 'http://example.com/m/';
 	my $fullurl = $feed_opts->{fullurl} || 'http://example.com/f/';
 
-	my $mid = utf8_header($mime, "Message-ID") or return 0;
-	# FIXME: refactor
-	my (undef, $href) = PublicInbox::View::trim_message_id($mid);
-
+	my $mid = $mime->header('Message-ID');
+	$mid = PublicInbox::Hval->new_msgid($mid);
+	my $href = $mid->as_href;
 	my $content = PublicInbox::View->as_feed_entry($mime,
 							"$fullurl$href.html");
 	defined($content) or return 0;
@@ -273,7 +272,8 @@ sub add_to_feed {
 	my $subject = utf8_header($mime, "Subject") || "";
 	length($subject) or return 0;
 
-	my $from = utf8_header($mime, "From") or return 0;
+	my $from = $mime->header('From') or return 0;
+
 
 	my @from = Email::Address->parse($from);
 	my $name = $from[0]->name;
@@ -281,9 +281,10 @@ sub add_to_feed {
 	my $email = $from[0]->address;
 	defined $email or $email = "";
 
-	my $date = utf8_header($mime, "Date");
+	my $date = $mime->header('Date');
 	$date or return 0;
-	$date = feed_date($date) or return 0;
+	$date = PublicInbox::Hval->new_oneline($date);
+	$date = feed_date($date->as_utf8) or return 0;
 	$feed->add_entry(
 		author => { name => $name, email => $email },
 		title => $subject,
@@ -300,17 +301,17 @@ sub dump_html_line {
 	if ($self->message) {
 		$args->[0] .= (' ' x $level);
 		my $simple = $self->message;
-		my $subj = utf8_header($simple, "Subject");
-		my $mid = utf8_header($simple, "Message-ID");
-		$mid =~ s/\A<//;
-		$mid =~ s/>\z//;
-		my $url = $args->[1] . xs_html(uri_escape($mid));
+		my $subj = $simple->header('Subject');
+		my $mid = $simple->header('Message-ID');
+		$mid = PublicInbox::Hval->new_msgid($mid);
+		my $url = $args->[1] . $mid->as_href;
 		my $from = utf8_header($simple, "From");
 		my @from = Email::Address->parse($from);
 		$from = $from[0]->name;
 		(defined($from) && length($from)) or $from = $from[0]->address;
 		$from = xs_html($from);
-		$subj = xs_html($subj);
+		$subj = PublicInbox::Hval->new_oneline($subj);
+		$subj = $subj->as_html;
 		$args->[0] .= "<a href=\"$url.html\">$subj</a> $from\n";
 	}
 	dump_html_line($self->child, $level+1, $args) if $self->child;
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
new file mode 100644
index 00000000..26a2d0bb
--- /dev/null
+++ b/lib/PublicInbox/Hval.pm
@@ -0,0 +1,53 @@
+# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+#
+# represents a header value in various forms
+package PublicInbox::Hval;
+use strict;
+use warnings;
+use fields qw(raw -as_utf8);
+use Encode qw(find_encoding);
+use CGI qw(escapeHTML);
+use URI::Escape qw(uri_escape);
+
+my $enc_utf8 = find_encoding('utf8');
+my $enc_ascii = find_encoding('us-ascii');
+my $enc_mime = find_encoding('MIME-Header');
+
+sub new {
+	my ($class, $raw) = @_;
+	my $self = fields::new($class);
+
+	# we never care about leading/trailing whitespace
+	$raw =~ s/\A\s*//;
+	$raw =~ s/\s*\z//;
+	$self->{raw} = $raw;
+	$self;
+}
+
+sub new_msgid {
+	my ($class, $raw) = @_;
+	$raw =~ s/\A<//;
+	$raw =~ s/>\z//;
+	$class->new($raw);
+}
+
+sub new_oneline {
+	my ($class, $raw) = @_;
+	$raw = '' unless defined $raw;
+	$raw =~ tr/\t\n / /s; # squeeze spaces
+	$raw =~ tr/\r//d; # kill CR
+	$class->new($raw);
+}
+
+sub as_utf8 {
+	my ($self) = @_;
+	$self->{-as_utf8} ||= $enc_utf8->encode($self->{raw});
+}
+
+sub ascii_html { $enc_ascii->encode(escapeHTML($_[0]), Encode::HTMLCREF) }
+
+sub as_html { ascii_html($_[0]->as_utf8) }
+sub as_href { ascii_html(uri_escape($_[0]->as_utf8)) }
+
+1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 355d346a..ecd49156 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -3,6 +3,7 @@
 package PublicInbox::View;
 use strict;
 use warnings;
+use PublicInbox::Hval;
 use URI::Escape qw/uri_escape/;
 use CGI qw/escapeHTML/;
 use Encode qw/find_encoding/;
@@ -135,16 +136,6 @@ sub add_text_body_full {
 	$s;
 }
 
-sub trim_message_id {
-	my ($mid) = @_;
-	$mid =~ s/\A\s*<//;
-	$mid =~ s/>\s*\z//;
-	my $html = ascii_html($mid);
-	my $href = ascii_html(uri_escape($mid));
-
-	($html, $href);
-}
-
 sub ascii_html {
 	$enc_ascii->encode(escapeHTML($_[0]), Encode::HTMLCREF);
 }
@@ -178,17 +169,18 @@ sub headers_to_html_header {
 
 	my $mid = $simple->header('Message-ID');
 	if (defined $mid) {
-		my ($html, $href) = trim_message_id($mid);
-		$rv .= "Message-ID: &lt;$html&gt; ";
-		unless ($full_pfx) {
-			$href = "../m/$href";
-		}
+		$mid = PublicInbox::Hval->new_msgid($mid);
+		$rv .= 'Message-ID: &lt;' . $mid->as_html . '&gt; ';
+		my $href = $mid->as_href;
+		$href = "../m/$href" unless $full_pfx;
 		$rv .= "(<a href=\"$href.txt\">original</a>)\n";
 	}
 
 	my $irp = $simple->header('In-Reply-To');
 	if (defined $irp) {
-		my ($html, $href) = trim_message_id($irp);
+		$irp = PublicInbox::Hval->new_msgid($irp);
+		my $html = $irp->as_html;
+		my $href = $irp->as_href;
 		$rv .= "In-Reply-To: &lt;";
 		$rv .= "<a href=\"$href.html\">$html</a>&gt;\n";
 	}
-- 
2.50.0