X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FView.pm;h=4d6f44e0bdc8dca7b1b9da9fa88bb3349b6e74da;hb=52a02a813a46940530183ede4d4cc7028290cd8f;hp=d897aeecc3ed3abf6ba2dc71bb9540cbf3ec0bdd;hpb=321fcd4881bc196521466c6698f7c4b4a4bb5360;p=public-inbox.git diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index d897aeec..4d6f44e0 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -17,6 +17,7 @@ use PublicInbox::Address; use PublicInbox::WwwStream; use PublicInbox::Reply; use PublicInbox::ViewDiff qw(flush_diff); +use PublicInbox::Eml; use POSIX qw(strftime); use Time::Local qw(timegm); use PublicInbox::Smsg qw(subject_normalized); @@ -56,7 +57,7 @@ sub msg_page { } else { $first = $ibx->msg_by_mid($mid) or return; } - my $mime = PublicInbox::MIME->new($first); + my $mime = PublicInbox::Eml->new($first); $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; my $hdr = $ctx->{hdr} = $mime->header_obj; $ctx->{obuf} = _msg_page_prepare_obuf($hdr, $ctx, 0); @@ -65,7 +66,7 @@ sub msg_page { $ctx->{mhref} = $next ? '../'.mid_href($smsg->{mid}).'/' : ''; multipart_text_as_html($mime, $ctx); $ctx->{-html_tip} = (${delete $ctx->{obuf}} .= '
'); - PublicInbox::WwwStream->response($ctx, 200, \&msg_page_i); + PublicInbox::WwwStream::response($ctx, 200, \&msg_page_i); } sub msg_page_more { # cold @@ -73,11 +74,10 @@ sub msg_page_more { # cold my $ibx = $ctx->{-inbox}; my $next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev); $ctx->{more} = [ $id, $prev, $next ] if $next; - $smsg = $ibx->smsg_mime($smsg) or return ''; + my $eml = $ibx->smsg_eml($smsg) or return ''; $ctx->{mhref} = '../' . mid_href($smsg->{mid}) . '/'; - my $mime = delete $smsg->{mime}; - $ctx->{obuf} = _msg_page_prepare_obuf($mime->header_obj, $ctx, $nr); - multipart_text_as_html($mime, $ctx); + $ctx->{obuf} = _msg_page_prepare_obuf($eml->header_obj, $ctx, $nr); + multipart_text_as_html($eml, $ctx); ${delete $ctx->{obuf}} .= '
'; } @@ -150,7 +150,7 @@ sub in_reply_to { sub fold_addresses ($) { return $_[0] if length($_[0]) <= COLS; # try to fold on commas after non-word chars before $lim chars, - # Try to get the "," preceeded by ">" or ")", but avoid folding + # Try to get the "," preceded by ">" or ")", but avoid folding # on the comma where somebody uses "Lastname, Firstname". # We also try to keep the last and penultimate addresses in # the list on the same line if possible, hence the extra \z @@ -180,14 +180,14 @@ sub nr_to_s ($$$) { # human-friendly format sub fmt_ts ($) { strftime('%Y-%m-%d %k:%M', gmtime($_[0])) } +# Displays the text of of the message for /$INBOX/$MSGID/[Tt]/ endpoint # this is already inside a
-sub index_entry {
-	my ($smsg, $ctx, $more) = @_;
-	my $subj = $smsg->subject;
-	my $mid_raw = $smsg->mid;
+sub eml_entry {
+	my ($ctx, $smsg, $eml, $more) = @_;
+	my $subj = delete $smsg->{subject};
+	my $mid_raw = $smsg->{mid};
 	my $id = id_compress($mid_raw, 1);
 	my $id_m = 'm'.$id;
-
 	my $root_anchor = $ctx->{root_anchor} || '';
 	my $irt;
 	my $obfs_ibx = $ctx->{-obfs_ibx};
@@ -200,12 +200,12 @@ sub index_entry {
 	$rv .= $subj . "\n";
 	$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
 	my @tocc;
-	my $ds = $smsg->ds; # for v1 non-Xapian/SQLite users
-	# deleting {mime} is critical to memory use,
-	# the rest of the fields saves about 400K as we iterate across 1K msgs
-	my ($mime) = delete @$smsg{qw(mime ds ts blob subject)};
+	my $ds = delete $smsg->{ds}; # for v1 non-Xapian/SQLite users
+
+	# Deleting these fields saves about 400K as we iterate across 1K msgs
+	delete @$smsg{qw(ts blob)};
 
-	my $hdr = $mime->header_obj;
+	my $hdr = $eml->header_obj;
 	my $from = _hdr_names_html($hdr, 'From');
 	obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
 	$rv .= "From: $from @ ".fmt_ts($ds)." UTC";
@@ -243,7 +243,7 @@ sub index_entry {
 	# scan through all parts, looking for displayable text
 	$ctx->{mhref} = $mhref;
 	$ctx->{obuf} = \$rv;
-	msg_iter($mime, \&add_text_body, $ctx, 1);
+	$eml->each_part(\&add_text_body, $ctx, 1);
 	delete $ctx->{obuf};
 
 	# add the footer
@@ -297,11 +297,9 @@ sub _th_index_lite {
 	my $nr_c = scalar @$children;
 	my $nr_s = 0;
 	my $siblings;
-	if (my $smsg = $node->{smsg}) {
-		# delete saves about 200KB on a 1K message thread
-		if (my $refs = delete $smsg->{references}) {
-			($$irt) = ($refs =~ m/$MID_EXTRACT\z/o);
-		}
+	# delete saves about 200KB on a 1K message thread
+	if (my $refs = delete $node->{references}) {
+		($$irt) = ($refs =~ m/$MID_EXTRACT\z/o);
 	}
 	my $irt_map = $mapping->{$$irt} if defined $$irt;
 	if (defined $irt_map) {
@@ -310,12 +308,12 @@ sub _th_index_lite {
 		$rv .= $pad . $irt_map->[0];
 		if ($idx > 0) {
 			my $prev = $siblings->[$idx - 1];
-			my $pmid = $prev->{id};
+			my $pmid = $prev->{mid};
 			if ($idx > 2) {
 				my $s = ($idx - 1). ' preceding siblings ...';
 				$rv .= pad_link($pmid, $level, $s);
 			} elsif ($idx == 2) {
-				my $ppmid = $siblings->[0]->{id};
+				my $ppmid = $siblings->[0]->{mid};
 				$rv .= $pad . $mapping->{$ppmid}->[0];
 			}
 			$rv .= $pad . $mapping->{$pmid}->[0];
@@ -324,30 +322,30 @@ sub _th_index_lite {
 	my $s_s = nr_to_s($nr_s, 'sibling', 'siblings');
 	my $s_c = nr_to_s($nr_c, 'reply', 'replies');
 	$attr =~ s!\n\z!\n!s;
-	$attr =~ s! !!s; # no point in duplicating subject
+	$attr =~ s! (?:" )?!!s; # no point in dup subject
 	$attr =~ s!]+>([^<]+)!$1!s; # no point linking to self
 	$rv .= "@ $attr";
 	if ($nr_c) {
-		my $cmid = $children->[0]->{id};
+		my $cmid = $children->[0]->{mid};
 		$rv .= $pad . $mapping->{$cmid}->[0];
 		if ($nr_c > 2) {
 			my $s = ($nr_c - 1). ' more replies';
 			$rv .= pad_link($cmid, $level + 1, $s);
 		} elsif (my $cn = $children->[1]) {
-			$rv .= $pad . $mapping->{$cn->{id}}->[0];
+			$rv .= $pad . $mapping->{$cn->{mid}}->[0];
 		}
 	}
 
 	my $next = $siblings->[$idx+1] if $siblings && $idx >= 0;
 	if ($next) {
-		my $nmid = $next->{id};
+		my $nmid = $next->{mid};
 		$rv .= $pad . $mapping->{$nmid}->[0];
 		my $nnext = $nr_s - $idx;
 		if ($nnext > 2) {
 			my $s = ($nnext - 1).' subsequent siblings';
 			$rv .= pad_link($nmid, $level, $s);
 		} elsif (my $nn = $siblings->[$idx + 2]) {
-			$rv .= $pad . $mapping->{$nn->{id}}->[0];
+			$rv .= $pad . $mapping->{$nn->{mid}}->[0];
 		}
 	}
 	$rv .= $pad ."$s_s, $s_c; $ctx->{s_nr}\n";
@@ -369,14 +367,14 @@ sub walk_thread ($$$) {
 
 sub pre_thread  { # walk_thread callback
 	my ($ctx, $level, $node, $idx) = @_;
-	$ctx->{mapping}->{$node->{id}} = [ '', $node, $idx, $level ];
+	$ctx->{mapping}->{$node->{mid}} = [ '', $node, $idx, $level ];
 	skel_dump($ctx, $level, $node);
 }
 
-sub thread_index_entry {
-	my ($ctx, $level, $smsg) = @_;
+sub thread_eml_entry {
+	my ($ctx, $level, $smsg, $eml) = @_;
 	my ($beg, $end) = thread_adj_level($ctx, $level);
-	$beg . '
' . index_entry($smsg, $ctx, 0) . '
' . $end; + $beg . '
' . eml_entry($ctx, $smsg, $eml, 0) . '
' . $end; } sub stream_thread_i { # PublicInbox::WwwStream::getline callback @@ -388,8 +386,8 @@ sub stream_thread_i { # PublicInbox::WwwStream::getline callback my $node = shift @$q or next; my $cl = $level + 1; unshift @$q, map { ($cl, $_) } @{$node->{children}}; - if (my $smsg = $ctx->{-inbox}->smsg_mime($node->{smsg})) { - return thread_index_entry($ctx, $level, $smsg); + if (my $eml = $ctx->{-inbox}->smsg_eml($node)) { + return thread_eml_entry($ctx, $level, $node, $eml); } else { return ghost_index_entry($ctx, $level, $node); } @@ -401,21 +399,21 @@ sub stream_thread ($$) { my ($rootset, $ctx) = @_; my $ibx = $ctx->{-inbox}; my @q = map { (0, $_) } @$rootset; - my ($smsg, $level); + my ($smsg, $eml, $level); while (@q) { $level = shift @q; - my $node = shift @q or next; + $smsg = shift @q or next; my $cl = $level + 1; - unshift @q, map { ($cl, $_) } @{$node->{children}}; - $smsg = $ibx->smsg_mime($node->{smsg}) and last; + unshift @q, map { ($cl, $_) } @{$smsg->{children}}; + $eml = $ibx->smsg_eml($smsg) and last; } - return missing_thread($ctx) unless $smsg; + return missing_thread($ctx) unless $eml; $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; $ctx->{-title_html} = ascii_html($smsg->{subject}); - $ctx->{-html_tip} = thread_index_entry($ctx, $level, $smsg); + $ctx->{-html_tip} = thread_eml_entry($ctx, $level, $smsg, $eml); $ctx->{-queue} = \@q; - PublicInbox::WwwStream->response($ctx, 200, \&stream_thread_i); + PublicInbox::WwwStream::response($ctx, 200, \&stream_thread_i); } # /$INBOX/$MESSAGE_ID/t/ @@ -453,60 +451,65 @@ sub thread_html { return stream_thread($rootset, $ctx) unless $ctx->{flat}; # flat display: lazy load the full message from smsg - my $smsg; - while (my $m = shift @$msgs) { - $smsg = $ibx->smsg_mime($m) and last; + my ($smsg, $eml); + while ($smsg = shift @$msgs) { + $eml = $ibx->smsg_eml($smsg) and last; } return missing_thread($ctx) unless $smsg; $ctx->{-title_html} = ascii_html($smsg->{subject}); - $ctx->{-html_tip} = '
'.index_entry($smsg, $ctx, scalar @$msgs);
+	$ctx->{-html_tip} = '
'.eml_entry($ctx, $smsg, $eml, scalar @$msgs);
 	$ctx->{msgs} = $msgs;
-	PublicInbox::WwwStream->response($ctx, 200, \&thread_html_i);
+	PublicInbox::WwwStream::response($ctx, 200, \&thread_html_i);
 }
 
 sub thread_html_i { # PublicInbox::WwwStream::getline callback
 	my ($nr, $ctx) = @_;
 	my $msgs = $ctx->{msgs} or return;
 	while (my $smsg = shift @$msgs) {
-		$ctx->{-inbox}->smsg_mime($smsg) or next;
-		return index_entry($smsg, $ctx, scalar @$msgs);
+		my $eml = $ctx->{-inbox}->smsg_eml($smsg) or next;
+		return eml_entry($ctx, $smsg, $eml, scalar @$msgs);
 	}
 	my ($skel) = delete @$ctx{qw(skel msgs)};
 	$$skel;
 }
 
 sub multipart_text_as_html {
-	# ($mime, $ctx) = @_; # msg_iter will do "$_[0] = undef"
+	# ($mime, $ctx) = @_; # each_part may do "$_[0] = undef"
 
 	# scan through all parts, looking for displayable text
-	msg_iter($_[0], \&add_text_body, $_[1], 1);
+	$_[0]->each_part(\&add_text_body, $_[1], 1);
 }
 
-sub flush_quote {
-	my ($s, $l, $quot) = @_;
-
-	my $rv = $l->to_html($$quot);
-
-	# we use a  here to allow users to specify their own
-	# color for quoted text
-	$$quot = undef;
-	$$s .= qq() . $rv . ''
+sub submsg_hdr ($$) {
+	my ($ctx, $eml) = @_;
+	my $obfs_ibx = $ctx->{-obfs_ibx};
+	my $rv = $ctx->{obuf};
+	$$rv .= "\n";
+	for my $h (qw(From To Cc Subject Date Message-ID X-Alt-Message-ID)) {
+		my @v = $eml->header($h);
+		for my $v (@v) {
+			obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
+			$v = ascii_html($v);
+			$$rv .= "$h: $v\n";
+		}
+	}
 }
 
 sub attach_link ($$$$;$) {
 	my ($ctx, $ct, $p, $fn, $err) = @_;
-	my ($part, $depth, @idx) = @$p;
-	my $nl = $idx[-1] > 1 ? "\n" : '';
-	my $idx = join('.', @idx);
+	my ($part, $depth, $idx) = @$p;
+
+	# Eml iteration clobbers multipart ->{bdy}, so do not offer
+	# downloads for 0-byte multipart attachments
+	return unless $part->{bdy};
+
+	my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...)
 	my $size = bytes::length($part->body);
 
 	# hide attributes normally, unless we want to aid users in
 	# spotting MUA problems:
 	$ct =~ s/;.*// unless $err;
 	$ct = ascii_html($ct);
-	my $desc = $part->header('Content-Description');
-	$desc = $fn unless defined $desc;
-	$desc = '' unless defined $desc;
 	my $sfn;
 	if (defined $fn && $fn =~ /\A$PublicInbox::Hval::FN\z/o) {
 		$sfn = $fn;
@@ -524,35 +527,53 @@ EOF
 	}
 	$$rv .= "[-- Attachment #$idx: ";
 	my $ts = "Type: $ct, Size: $size bytes";
+	my $desc = $part->header('Content-Description') // $fn // '';
 	$desc = ascii_html($desc);
 	$$rv .= ($desc eq '') ? "$ts --]" : "$desc --]\n[-- $ts --]";
 	$$rv .= "\n";
+
+	submsg_hdr($ctx, $part) if $part->{is_submsg};
+
 	undef;
 }
 
-sub add_text_body { # callback for msg_iter
+sub add_text_body { # callback for each_part
 	my ($p, $ctx) = @_;
 	my $upfx = $ctx->{mhref};
 	my $ibx = $ctx->{-inbox};
-	# $p - from msg_iter: [ Email::MIME, depth, @idx ]
-	my ($part, $depth, @idx) = @$p;
+	my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new;
+	# $p - from each_part: [ Email::MIME-like, depth, $idx ]
+	my ($part, $depth, $idx) = @$p;
 	my $ct = $part->content_type || 'text/plain';
 	my $fn = $part->filename;
 	my ($s, $err) = msg_part_text($part, $ct);
 	return attach_link($ctx, $ct, $p, $fn) unless defined $s;
 
+	my $rv = $ctx->{obuf};
+	if ($part->{is_submsg}) {
+		submsg_hdr($ctx, $part);
+		$$rv .= "\n";
+	}
+
 	# makes no difference to browsers, and don't screw up filename
 	# link generation in diffs with the extra '%0D'
 	$s =~ s/\r\n/\n/sg;
 
+	# will be escaped to `•' in HTML
+	obfuscate_addrs($ibx, $s, "\x{2022}") if $ibx->{obfuscate};
+
 	# always support diff-highlighting, but we can't linkify hunk
 	# headers for solver unless some coderepo are configured:
 	my $diff;
-	if ($s =~ /^(?:diff|---|\+{3}) /ms) {
-		# diffstat anchors do not link across attachments or messages:
-		$idx[0] = $upfx . $idx[0] if $upfx ne '';
-		$ctx->{-apfx} = join('/', @idx);
-		$ctx->{-anchors} = {}; # attr => filename
+	if ($s =~ /^--- [^\n]+\n\+{3} [^\n]+\n@@ /ms) {
+		# diffstat anchors do not link across attachments or messages,
+		# -apfx is just a stable prefix for making diffstat anchors
+		# linkable to the first diff hunk w/o crossing attachments
+		$idx =~ tr!.!/!; # compatibility with previous versions
+		$ctx->{-apfx} = $upfx . $idx;
+
+		# do attr => filename mappings for diffstats in git diffs:
+		$ctx->{-anchors} = {} if $s =~ /^diff --git /sm;
 		$diff = 1;
 		delete $ctx->{-long_path};
 		my $spfx;
@@ -579,27 +600,27 @@ sub add_text_body { # callback for msg_iter
 
 	# split off quoted and unquoted blocks:
 	my @sections = PublicInbox::MsgIter::split_quotes($s);
-	$s = '';
-	my $rv = $ctx->{obuf};
-	if (defined($fn) || $depth > 0 || $err) {
+	undef $s; # free memory
+	if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
 		# badly-encoded message with $err? tell the world about it!
 		attach_link($ctx, $ct, $p, $fn, $err);
 		$$rv .= "\n";
 	}
-	my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new;
 	foreach my $cur (@sections) {
 		if ($cur =~ /\A>/) {
-			flush_quote($rv, $l, \$cur);
+			# we use a  here to allow users to specify
+			# their own color for quoted text
+			$$rv .= qq();
+			$$rv .= $l->to_html($cur);
+			$$rv .= '';
 		} elsif ($diff) {
-			flush_diff($rv, $ctx, \$cur);
+			flush_diff($ctx, \$cur);
 		} else {
 			# regular lines, OK
 			$$rv .= $l->to_html($cur);
-			$cur = undef;
 		}
+		undef $cur; # free memory
 	}
-
-	obfuscate_addrs($ibx, $$rv) if $ibx->{obfuscate};
 }
 
 sub _msg_page_prepare_obuf {
@@ -833,7 +854,7 @@ sub indent_for {
 sub find_mid_root {
 	my ($ctx, $level, $node, $idx) = @_;
 	++$ctx->{root_idx} if $level == 0;
-	if ($node->{id} eq $ctx->{mid}) {
+	if ($node->{mid} eq $ctx->{mid}) {
 		$ctx->{found_mid_at} = $ctx->{root_idx};
 		return 0;
 	}
@@ -890,25 +911,25 @@ sub missing_thread {
 sub dedupe_subject {
 	my ($prev_subj, $subj, $val) = @_;
 
-	my $omit = ''; # '"' denotes identical text omitted
+	my $omit; # '"' denotes identical text omitted
 	my (@prev_pop, @curr_pop);
 	while (@$prev_subj && @$subj && $subj->[-1] eq $prev_subj->[-1]) {
 		push(@prev_pop, pop(@$prev_subj));
 		push(@curr_pop, pop(@$subj));
-		$omit ||= $val;
+		$omit //= $val;
 	}
 	pop @$subj if @$subj && $subj->[-1] =~ /^re:\s*/i;
 	if (scalar(@curr_pop) == 1) {
-		$omit = '';
+		$omit = undef;
 		push @$prev_subj, @prev_pop;
 		push @$subj, @curr_pop;
 	}
-	$omit;
+	$omit // '';
 }
 
 sub skel_dump { # walk_thread callback
-	my ($ctx, $level, $node) = @_;
-	my $smsg = $node->{smsg} or return _skel_ghost($ctx, $level, $node);
+	my ($ctx, $level, $smsg) = @_;
+	$smsg->{blob} or return _skel_ghost($ctx, $level, $smsg);
 
 	my $skel = $ctx->{skel};
 	my $cur = $ctx->{cur};
@@ -991,7 +1012,7 @@ sub skel_dump { # walk_thread callback
 sub _skel_ghost {
 	my ($ctx, $level, $node) = @_;
 
-	my $mid = $node->{id};
+	my $mid = $node->{mid};
 	my $d = '     [not found] ';
 	$d .= '    '  if exists $ctx->{searchview};
 	$d .= indent_for($level) . th_pfx($level);
@@ -1014,18 +1035,23 @@ sub _skel_ghost {
 
 sub sort_ds {
 	[ sort {
-		(eval { $a->topmost->{smsg}->{ds} } || 0) <=>
-		(eval { $b->topmost->{smsg}->{ds} } || 0)
+		(eval { $a->topmost->{ds} } || 0) <=>
+		(eval { $b->topmost->{ds} } || 0)
 	} @{$_[0]} ];
 }
 
 # accumulate recent topics if search is supported
 # returns 200 if done, 404 if not
 sub acc_topic { # walk_thread callback
-	my ($ctx, $level, $node) = @_;
-	my $mid = $node->{id};
-	my $smsg = $node->{smsg} // $ctx->{-inbox}->smsg_by_mid($mid);
-	if ($smsg) {
+	my ($ctx, $level, $smsg) = @_;
+	my $mid = $smsg->{mid};
+	my $has_blob = $smsg->{blob} // do {
+		if (my $by_mid = $ctx->{-inbox}->smsg_by_mid($mid)) {
+			%$smsg = (%$smsg, %$by_mid);
+			1;
+		}
+	};
+	if ($has_blob) {
 		my $subj = subject_normalized($smsg->{subject});
 		$subj = '(no subject)' if $subj eq '';
 		my $ds = $smsg->{ds};
@@ -1101,6 +1127,7 @@ sub dump_topics {
 			my @next_prev = @subj; # full copy
 			my $omit = dedupe_subject($prev_subj, \@subj, ' "');
 			$prev_subj = \@next_prev;
+			$subj = join(' ', @subj);
 			$subj = ascii_html($subj);
 			obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
 			$href = mid_href($mid);
@@ -1186,7 +1213,7 @@ sub index_topics {
 	if (@$msgs) {
 		walk_thread(thread_results($ctx, $msgs), $ctx, \&acc_topic);
 	}
-	PublicInbox::WwwStream->response($ctx, dump_topics($ctx), \&index_nav);
+	PublicInbox::WwwStream::response($ctx, dump_topics($ctx), \&index_nav);
 }
 
 sub thread_adj_level {
@@ -1216,7 +1243,7 @@ sub thread_adj_level {
 sub ghost_index_entry {
 	my ($ctx, $level, $node) = @_;
 	my ($beg, $end) = thread_adj_level($ctx,  $level);
-	$beg . '
'. ghost_parent($ctx->{-upfx}, $node->{id})
+	$beg . '
'. ghost_parent($ctx->{-upfx}, $node->{mid})
 		. '
' . $end; }