# Copyright (C) all contributors
# License: AGPL-3.0+
#
# Used for displaying the HTML web interface.
# See Documentation/design_www.txt for this.
package PublicInbox::View;
use strict;
use v5.10.1;
use List::Util qw(max);
use Text::Wrap qw(wrap); # stdlib, we need Perl 5.6+ for $huge
use PublicInbox::MsgTime qw(msg_datestamp);
use PublicInbox::Hval qw(ascii_html obfuscate_addrs prurl mid_href
ts2str fmt_ts);
use PublicInbox::Linkify;
use PublicInbox::MID qw(id_compress mids mids_for_index references
$MID_EXTRACT);
use PublicInbox::MsgIter;
use PublicInbox::Address;
use PublicInbox::WwwStream qw(html_oneshot);
use PublicInbox::Reply;
use PublicInbox::ViewDiff qw(flush_diff);
use PublicInbox::Eml;
use POSIX qw(strftime);
use Time::Local qw(timegm);
use PublicInbox::Smsg qw(subject_normalized);
use PublicInbox::ContentHash qw(content_hash);
use constant COLS => 72;
use constant INDENT => ' ';
use constant TCHILD => '` ';
sub th_pfx ($) { $_[0] == 0 ? '' : TCHILD };
sub msg_page_i {
my ($ctx, $eml) = @_;
if ($eml) { # called by WwwStream::async_eml or getline
my $smsg = $ctx->{smsg};
my $over = $ctx->{ibx}->over;
$ctx->{smsg} = $over ? $over->next_by_mid(@{$ctx->{next_arg}})
: $ctx->gone('over');
$ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ?
"../${\mid_href($smsg->{mid})}/" : '';
if (_msg_page_prepare($eml, $ctx)) {
$eml->each_part(\&add_text_body, $ctx, 1);
print { $ctx->{zfh} } '';
}
html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg};
''; # XXX TODO cleanup
} else { # called by WwwStream::async_next or getline
$ctx->{smsg}; # may be undef
}
}
# /$INBOX/$MSGID/ for unindexed v1 inboxes
sub no_over_html ($) {
my ($ctx) = @_;
my $bref = $ctx->{ibx}->msg_by_mid($ctx->{mid}) or return; # 404
my $eml = PublicInbox::Eml->new($bref);
$ctx->{mhref} = '';
PublicInbox::WwwStream::init($ctx);
if (_msg_page_prepare($eml, $ctx)) { # sets {-title_html}
$eml->each_part(\&add_text_body, $ctx, 1);
print { $ctx->{zfh} } '';
}
html_footer($ctx, $eml);
$ctx->html_done;
}
# public functions: (unstable)
sub msg_page {
my ($ctx) = @_;
my $ibx = $ctx->{ibx};
$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
my $over = $ibx->over or return no_over_html($ctx);
my ($id, $prev);
my $next_arg = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
my $smsg = $ctx->{smsg} = $over->next_by_mid(@$next_arg) or
return; # undef == 404
# allow user to easily browse the range around this message if
# they have ->over
$ctx->{-t_max} = $smsg->{ts};
$ctx->{-spfx} = '../' if $ibx->{coderepo};
PublicInbox::WwwStream::aresponse($ctx, \&msg_page_i);
}
# /$INBOX/$MESSAGE_ID/#R
sub msg_reply ($$) {
my ($ctx, $hdr) = @_;
my $se_url =
'https://kernel.org/pub/software/scm/git/docs/git-send-email.html';
my $p_url =
'https://en.wikipedia.org/wiki/Posting_style#Interleaved_style';
my $info = '';
my $ibx = $ctx->{ibx};
if (my $url = $ibx->{infourl}) {
$url = prurl($ctx->{env}, $url);
$info = qq(\n List information: $url\n);
}
my ($arg, $link, $reply_to_all) =
PublicInbox::Reply::mailto_arg_link($ibx, $hdr);
if (ref($arg) eq 'SCALAR') {
return '
'.ascii_html($$arg).'
';
}
# mailto: link only works if address obfuscation is disabled
if ($link) {
$link = <In-Reply-To header
via mailto: links, try the mailto: link
EOF
}
push @$arg, '/path/to/YOUR_REPLY';
$arg = ascii_html(join(" \\\n ", '', @$arg));
<
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and $reply_to_all from there: mbox
Avoid top-posting and favor interleaved quoting:
$p_url
$info
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email$arg
$se_url
$link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
EOF
}
sub in_reply_to {
my ($hdr) = @_;
my $refs = references($hdr);
$refs->[-1];
}
sub fold_addresses ($) {
return $_[0] if length($_[0]) <= COLS;
# try to fold on commas after non-word chars before $lim chars,
# Try to get the "," preceded by ">" or ")", but avoid folding
# on the comma where somebody uses "Lastname, Firstname".
# We also try to keep the last and penultimate addresses in
# the list on the same line if possible, hence the extra \z
# Fall back to folding on spaces at $lim + 1 chars
my $lim = COLS - 8; # 8 = "\t" display width
my $too_long = $lim + 1;
$_[0] =~ s/\s*\z//s; # Email::Simple doesn't strip trailing spaces
$_[0] = join("\n\t",
($_[0] =~ /(.{0,$lim}\W(?:,|\z)|
.{1,$lim}(?:,|\z)|
.{1,$lim}|
.{$too_long,}?)(?:\s|\z)/xgo));
}
sub _hdr_names_html ($$) {
my ($hdr, $field) = @_;
my @vals = $hdr->header($field) or return '';
ascii_html(join(', ', PublicInbox::Address::names(join(',', @vals))));
}
sub nr_to_s ($$$) {
my ($nr, $singular, $plural) = @_;
return "0 $plural" if $nr == 0;
$nr == 1 ? "$nr $singular" : "$nr $plural";
}
# Displays the text of of the message for /$INBOX/$MSGID/[Tt]/ endpoint
# this is already inside a
sub eml_entry {
my ($ctx, $eml) = @_;
my $smsg = delete $ctx->{smsg};
my $subj = delete $smsg->{subject};
my $mid_raw = $smsg->{mid};
my $id = id_compress($mid_raw, 1);
my $id_m = 'm'.$id;
my $root_anchor = $ctx->{root_anchor} || '';
my $irt;
my $obfs_ibx = $ctx->{-obfs_ibx};
$subj = '(no subject)' if $subj eq '';
my $rv = "* ";
$subj = ''.ascii_html($subj).'';
obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
$subj = "$subj" if $root_anchor eq $id_m;
$rv .= $subj . "\n";
$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
my @tocc;
my $ds = delete $smsg->{ds}; # for v1 non-Xapian/SQLite users
# Deleting these fields saves about 400K as we iterate across 1K msgs
delete @$smsg{qw(ts blob)};
my $from = _hdr_names_html($eml, 'From');
obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
$rv .= "From: $from @ ".fmt_ts($ds)." UTC";
my $upfx = $ctx->{-upfx};
my $mhref = $upfx . mid_href($mid_raw) . '/';
$rv .= qq{ (permalink / };
$rv .= qq{raw)\n};
my $to = fold_addresses(_hdr_names_html($eml, 'To'));
my $cc = fold_addresses(_hdr_names_html($eml, 'Cc'));
my ($tlen, $clen) = (length($to), length($cc));
my $to_cc = '';
if (($tlen + $clen) > COLS) {
$to_cc .= ' To: '.$to."\n" if $tlen;
$to_cc .= ' Cc: '.$cc."\n" if $clen;
} else {
if ($tlen) {
$to_cc .= ' To: '.$to;
$to_cc .= '; +Cc: '.$cc if $clen;
} else {
$to_cc .= ' Cc: '.$cc if $clen;
}
$to_cc .= "\n";
}
obfuscate_addrs($obfs_ibx, $to_cc) if $obfs_ibx;
$rv .= $to_cc;
my $mapping = $ctx->{mapping};
if (!$mapping && (defined($irt) || defined($irt = in_reply_to($eml)))) {
my $href = $upfx . mid_href($irt) . '/';
my $html = ascii_html($irt);
$rv .= qq(In-Reply-To: <$html>\n)
}
say { $ctx->zfh } $rv;
# scan through all parts, looking for displayable text
$ctx->{mhref} = $mhref;
$ctx->{changed_href} = "#e$id"; # for diffstat "files? changed,"
$eml->each_part(\&add_text_body, $ctx, 1); # expensive
# add the footer
$rv = "\n^ ".
"permalink" .
" raw" .
" reply";
delete($ctx->{-qry}) and
$rv .= qq[ related];
my $hr;
if (defined(my $pct = $smsg->{pct})) { # used by SearchView.pm
$rv .= "\t[relevance $pct%]";
$hr = 1;
} elsif ($mapping) {
my $nested = 'nested';
my $flat = 'flat';
if ($ctx->{flat}) {
$hr = 1;
$flat = "$flat";
} else {
$nested = "$nested";
}
$rv .= "\t[$flat";
$rv .= "|$nested]";
$rv .= " $ctx->{s_nr}";
} else {
$hr = $ctx->{-hr};
}
# do we have more messages? start a new
if so
$rv .= scalar(@{$ctx->{msgs}}) ? '
' : '
' if $hr;
$rv;
}
sub pad_link ($$;$) {
my ($mid, $level, $s) = @_;
$s ||= '...';
my $href = defined($mid) ?
("($s)\n") :
"($s)\n";
(' 'x19).indent_for($level).th_pfx($level).$href;
}
sub _skel_hdr {
# my ($mapping, $mid) = @_;
($_[0]->{$_[1] // \'bogus'} // [ "(?)\n" ])->[0];
}
sub _th_index_lite {
my ($mid_raw, $irt, $id, $ctx) = @_;
my $rv = '';
my $mapping = $ctx->{mapping} or return $rv;
my $pad = ' ';
my $mid_map = $mapping->{$mid_raw} //
return 'public-inbox BUG: '.ascii_html($mid_raw).' not mapped';
my ($attr, $node, $idx, $level) = @$mid_map;
my $children = $node->{children};
my $nr_c = scalar @$children;
my $nr_s = 0;
my $siblings;
# delete saves about 200KB on a 1K message thread
if (my $refs = delete $node->{references}) {
($$irt) = ($refs =~ m/$MID_EXTRACT\z/o);
}
my $irt_map = $mapping->{$$irt} if defined $$irt;
if (defined $irt_map) {
$siblings = $irt_map->[1]->{children};
$nr_s = scalar(@$siblings) - 1;
$rv .= $pad . $irt_map->[0];
if ($idx > 0) {
my $prev = $siblings->[$idx - 1];
my $pmid = $prev->{mid};
if ($idx > 2) {
my $s = ($idx - 1). ' preceding siblings ...';
$rv .= pad_link($pmid, $level, $s);
} elsif ($idx == 2) {
$rv .= $pad . _skel_hdr($mapping,
$siblings->[0] ?
$siblings->[0]->{mid} : undef);
}
$rv .= $pad . _skel_hdr($mapping, $pmid);
}
}
my $s_s = nr_to_s($nr_s, 'sibling', 'siblings');
my $s_c = nr_to_s($nr_c, 'reply', 'replies');
chop $attr; # remove "\n"
$attr =~ s! (?:" )?!!s; # no point in dup subject
$attr =~ s!]+>([^<]+)!$1!s; # no point linking to self
$rv .= "@ $attr\n";
if ($nr_c) {
my $cmid = $children->[0] ? $children->[0]->{mid} : undef;
$rv .= $pad . _skel_hdr($mapping, $cmid);
if ($nr_c > 2) {
my $s = ($nr_c - 1). ' more replies';
$rv .= pad_link($cmid, $level + 1, $s);
} elsif (my $cn = $children->[1]) {
$rv .= $pad . _skel_hdr($mapping, $cn->{mid});
}
}
my $next = $siblings->[$idx+1] if $siblings && $idx >= 0;
if ($next) {
my $nmid = $next->{mid};
$rv .= $pad . _skel_hdr($mapping, $nmid);
my $nnext = $nr_s - $idx;
if ($nnext > 2) {
my $s = ($nnext - 1).' subsequent siblings';
$rv .= pad_link($nmid, $level, $s);
} elsif (my $nn = $siblings->[$idx + 2]) {
$rv .= $pad . _skel_hdr($mapping, $nn->{mid});
}
}
$rv .= $pad ."$s_s, $s_c; $ctx->{s_nr}\n";
}
# non-recursive thread walker
sub walk_thread ($$$) {
my ($rootset, $ctx, $cb) = @_;
my @q = map { (0, $_, -1) } @$rootset;
while (@q) {
my ($level, $node, $i) = splice(@q, 0, 3);
defined $node or next;
$cb->($ctx, $level, $node, $i) or return;
++$level;
$i = 0;
unshift @q, map { ($level, $_, $i++) } @{$node->{children}};
}
}
sub pre_thread { # walk_thread callback
my ($ctx, $level, $node, $idx) = @_;
$ctx->{mapping}->{$node->{mid}} = [ '', $node, $idx, $level ];
skel_dump($ctx, $level, $node);
}
sub thread_eml_entry {
my ($ctx, $eml) = @_;
my ($beg, $end) = thread_adj_level($ctx, $ctx->{level});
print { $ctx->zfh } $beg, '
';
print { $ctx->{zfh} } eml_entry($ctx, $eml), '
';
$end;
}
sub next_in_queue ($$) {
my ($q, $ghost_ok) = @_;
while (@$q) {
my ($level, $smsg) = splice(@$q, 0, 2);
my $cl = $level + 1;
unshift @$q, map { ($cl, $_) } @{$smsg->{children}};
return ($level, $smsg) if $ghost_ok || exists($smsg->{blob});
}
undef;
}
sub stream_thread_i { # PublicInbox::WwwStream::getline callback
my ($ctx, $eml) = @_;
return thread_eml_entry($ctx, $eml) if $eml;
return unless exists($ctx->{skel});
my $ghost_ok = $ctx->{nr}++;
while (1) {
my ($lvl, $smsg) = next_in_queue($ctx->{-queue}, $ghost_ok);
if ($smsg) {
if (exists $smsg->{blob}) { # next message for cat-file
$ctx->{level} = $lvl;
if (!$ghost_ok) { # first non-ghost
$ctx->{-title_html} =
ascii_html($smsg->{subject});
print { $ctx->zfh } $ctx->html_top;
}
return $smsg;
}
# buffer the ghost entry and loop
print { $ctx->zfh } ghost_index_entry($ctx, $lvl, $smsg)
} else { # all done
print { $ctx->zfh } thread_adj_level($ctx, 0),
${delete($ctx->{skel})};
return;
}
}
}
sub stream_thread ($$) {
my ($rootset, $ctx) = @_;
@{$ctx->{-queue}} = map { (0, $_) } @$rootset;
PublicInbox::WwwStream::aresponse($ctx, \&stream_thread_i);
}
# /$INBOX/$MSGID/t/ and /$INBOX/$MSGID/T/
sub thread_html {
my ($ctx) = @_;
$ctx->{-upfx} = '../../';
my $mid = $ctx->{mid};
my $ibx = $ctx->{ibx};
my ($nr, $msgs) = $ibx->over->get_thread($mid);
return missing_thread($ctx) if $nr == 0;
$ctx->{-spfx} = '../../' if $ibx->{coderepo};
# link $INBOX_DIR/description text to "index_topics" view around
# the newest message in this thread
my $t = ts2str($ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs));
my $t_fmt = fmt_ts($ctx->{-t_max});
my $skel = '
';
return stream_thread($rootset, $ctx) unless $ctx->{flat};
# flat display: lazy load the full message from smsg
$ctx->{msgs} = $msgs;
$ctx->{-html_tip} = '
';
PublicInbox::WwwStream::aresponse($ctx, \&thread_html_i);
}
sub thread_html_i { # PublicInbox::WwwStream::getline callback
my ($ctx, $eml) = @_;
if ($eml) {
my $smsg = $ctx->{smsg};
if (exists $ctx->{-html_tip}) {
$ctx->{-title_html} = ascii_html($smsg->{subject});
print { $ctx->zfh } $ctx->html_top;
}
return eml_entry($ctx, $eml);
} else {
while (my $smsg = shift @{$ctx->{msgs}}) {
return $smsg if exists($smsg->{blob});
}
my $skel = delete($ctx->{skel}) or return; # all done
print { $ctx->zfh } $$skel;
undef;
}
}
sub submsg_hdr ($$) {
my ($ctx, $eml) = @_;
my $s = "\n";
for my $h (qw(From To Cc Subject Date Message-ID X-Alt-Message-ID)) {
$s .= "$h: $_\n" for $eml->header($h);
}
obfuscate_addrs($ctx->{-obfs_ibx}, $s) if $ctx->{-obfs_ibx};
ascii_html($s);
}
sub attach_link ($$$$;$) {
my ($ctx, $ct, $p, $fn, $err) = @_;
my ($part, $depth, $idx) = @$p;
# Eml iteration clobbers multipart ->{bdy}, so do not offer
# downloads for 0-byte multipart attachments
return unless $part->{bdy};
my $size = length($part->body);
delete $part->{bdy}; # save memory
# hide attributes normally, unless we want to aid users in
# spotting MUA problems:
$ct =~ s/;.*// unless $err;
$ct = ascii_html($ct);
my $sfn;
if (defined $fn && $fn =~ /\A$PublicInbox::Hval::FN\z/o) {
$sfn = $fn;
} elsif ($ct eq 'text/plain') {
$sfn = 'a.txt';
} else {
$sfn = 'a.bin';
}
my $rv = $idx eq '1' ? '' : "\n"; # like join("\n", ...)
$rv .= qq({mhref}$idx-$sfn">);
$rv .= <header('Content-Description') // $fn // '';
$rv .= ascii_html($desc)." --]\n[-- " if $desc ne '';
$rv .= "Type: $ct, Size: $size bytes --]\n";
$rv .= submsg_hdr($ctx, $part) if $part->{is_submsg};
$rv;
}
sub add_text_body { # callback for each_part
my ($p, $ctx) = @_;
my $upfx = $ctx->{mhref};
my $ibx = $ctx->{ibx};
my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new;
# $p - from each_part: [ Email::MIME-like, depth, $idx ]
my ($part, $depth, $idx) = @$p;
my $ct = $part->content_type || 'text/plain';
my $fn = $part->filename;
my ($s, $err) = msg_part_text($part, $ct);
my $zfh = $ctx->zfh;
$s // return print $zfh (attach_link($ctx, $ct, $p, $fn) // '');
say $zfh submsg_hdr($ctx, $part) if $part->{is_submsg};
# makes no difference to browsers, and don't screw up filename
# link generation in diffs with the extra '%0D'
$s =~ s/\r+\n/\n/sg;
# will be escaped to `•' in HTML
obfuscate_addrs($ibx, $s, "\x{2022}") if $ibx->{obfuscate};
# always support diff-highlighting, but we can't linkify hunk
# headers for solver unless some coderepo are configured:
my $diff;
if ($s =~ /^--- [^\n]+\n\+{3} [^\n]+\n@@ /ms) {
# diffstat anchors do not link across attachments or messages,
# -apfx is just a stable prefix for making diffstat anchors
# linkable to the first diff hunk w/o crossing attachments
$idx =~ tr!.!/!; # compatibility with previous versions
$ctx->{-apfx} = $upfx . $idx;
# do attr => filename mappings for diffstats in git diffs:
$ctx->{-anchors} = {} if $s =~ /^diff --git /sm;
$diff = 1;
delete $ctx->{-long_path};
};
# split off quoted and unquoted blocks:
my @sections = PublicInbox::MsgIter::split_quotes($s);
undef $s; # free memory
if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
# badly-encoded message with $err? tell the world about it!
say $zfh attach_link($ctx, $ct, $p, $fn, $err);
}
delete $part->{bdy}; # save memory
for my $cur (@sections) { # $cur may be huge
if ($cur =~ /\A>/) {
# we use a here to allow users to specify
# their own color for quoted text
print $zfh qq(),
$l->to_html($cur), '';
} elsif ($diff) {
flush_diff($ctx, \$cur);
} else { # regular lines, OK
print $zfh $l->to_html($cur);
}
undef $cur; # free memory
}
}
sub _msg_page_prepare {
my ($eml, $ctx) = @_;
my $have_over = !!$ctx->{ibx}->over;
my $mids = mids_for_index($eml);
my $nr = $ctx->{nr}++;
if ($nr) { # unlikely
if ($ctx->{chash} eq content_hash($eml)) {
warn "W: BUG? @$mids not deduplicated properly\n";
return;
}
$ctx->{-html_tip} =
qq[
WARNING: multiple messages have this Message-ID (diff)