X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FView.pm;h=98445f0e0b0f60ad0a1ee07b28fb4b3fe1dec19b;hb=0179230221231f8f5a3edc2f2836cc7c7e089dda;hp=89174296335ab594cfac100db4d87664d506afc4;hpb=906393b801050e303d2ec2a660c85de4a5fa4740;p=public-inbox.git diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 89174296..98445f0e 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -17,6 +17,7 @@ use PublicInbox::Address; use PublicInbox::WwwStream; use PublicInbox::Reply; use PublicInbox::ViewDiff qw(flush_diff); +use PublicInbox::Eml; use POSIX qw(strftime); use Time::Local qw(timegm); use PublicInbox::Smsg qw(subject_normalized); @@ -26,59 +27,56 @@ use constant TCHILD => '` '; sub th_pfx ($) { $_[0] == 0 ? '' : TCHILD }; sub msg_page_i { - my ($nr, $ctx) = @_; - if (my $more = delete $ctx->{more}) { # unlikely - # fake an EOF if $more retrieval fails; - eval { msg_page_more($ctx, $nr, @$more) }; - } elsif (my $hdr = delete $ctx->{hdr}) { - # fake an EOF if generating the footer fails; - # we want to at least show the message if something - # here crashes: - eval { html_footer($ctx, $hdr) }; - } else { - undef + my ($ctx, $eml) = @_; + if ($eml) { # called by WwwStream::async_eml or getline + my $smsg = $ctx->{smsg}; + $ctx->{smsg} = $ctx->{over}->next_by_mid(@{$ctx->{next_arg}}); + $ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ? + "../${\mid_href($smsg->{mid})}/" : ''; + my $hdr = $eml->header_obj; + my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($hdr, $ctx); + multipart_text_as_html($eml, $ctx); + delete $ctx->{obuf}; + $$obuf .= '
-sub index_entry { - my ($smsg, $ctx, $more) = @_; - my $subj = $smsg->subject; - my $mid_raw = $smsg->mid; +sub eml_entry { + my ($ctx, $smsg, $eml, $more) = @_; + my $subj = delete $smsg->{subject}; + my $mid_raw = $smsg->{mid}; my $id = id_compress($mid_raw, 1); my $id_m = 'm'.$id; - my $root_anchor = $ctx->{root_anchor} || ''; my $irt; my $obfs_ibx = $ctx->{-obfs_ibx}; @@ -200,12 +198,12 @@ sub index_entry { $rv .= $subj . "\n"; $rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx); my @tocc; - my $ds = $smsg->ds; # for v1 non-Xapian/SQLite users - # deleting {mime} is critical to memory use, - # the rest of the fields saves about 400K as we iterate across 1K msgs - my ($mime) = delete @$smsg{qw(mime ds ts blob subject)}; + my $ds = delete $smsg->{ds}; # for v1 non-Xapian/SQLite users + + # Deleting these fields saves about 400K as we iterate across 1K msgs + delete @$smsg{qw(ts blob)}; - my $hdr = $mime->header_obj; + my $hdr = $eml->header_obj; my $from = _hdr_names_html($hdr, 'From'); obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx; $rv .= "From: $from @ ".fmt_ts($ds)." UTC"; @@ -243,7 +241,7 @@ sub index_entry { # scan through all parts, looking for displayable text $ctx->{mhref} = $mhref; $ctx->{obuf} = \$rv; - msg_iter($mime, \&add_text_body, $ctx, 1); + $eml->each_part(\&add_text_body, $ctx, 1); delete $ctx->{obuf}; # add the footer @@ -297,11 +295,9 @@ sub _th_index_lite { my $nr_c = scalar @$children; my $nr_s = 0; my $siblings; - if (my $smsg = $node->{smsg}) { - # delete saves about 200KB on a 1K message thread - if (my $refs = delete $smsg->{references}) { - ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o); - } + # delete saves about 200KB on a 1K message thread + if (my $refs = delete $node->{references}) { + ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o); } my $irt_map = $mapping->{$$irt} if defined $$irt; if (defined $irt_map) { @@ -310,12 +306,12 @@ sub _th_index_lite { $rv .= $pad . $irt_map->[0]; if ($idx > 0) { my $prev = $siblings->[$idx - 1]; - my $pmid = $prev->{id}; + my $pmid = $prev->{mid}; if ($idx > 2) { my $s = ($idx - 1). ' preceding siblings ...'; $rv .= pad_link($pmid, $level, $s); } elsif ($idx == 2) { - my $ppmid = $siblings->[0]->{id}; + my $ppmid = $siblings->[0]->{mid}; $rv .= $pad . $mapping->{$ppmid}->[0]; } $rv .= $pad . $mapping->{$pmid}->[0]; @@ -324,30 +320,30 @@ sub _th_index_lite { my $s_s = nr_to_s($nr_s, 'sibling', 'siblings'); my $s_c = nr_to_s($nr_c, 'reply', 'replies'); $attr =~ s!\n\z!\n!s; - $attr =~ s! !!s; # no point in duplicating subject + $attr =~ s! (?:" )?!!s; # no point in dup subject $attr =~ s!]+>([^<]+)!$1!s; # no point linking to self $rv .= "@ $attr"; if ($nr_c) { - my $cmid = $children->[0]->{id}; + my $cmid = $children->[0]->{mid}; $rv .= $pad . $mapping->{$cmid}->[0]; if ($nr_c > 2) { my $s = ($nr_c - 1). ' more replies'; $rv .= pad_link($cmid, $level + 1, $s); } elsif (my $cn = $children->[1]) { - $rv .= $pad . $mapping->{$cn->{id}}->[0]; + $rv .= $pad . $mapping->{$cn->{mid}}->[0]; } } my $next = $siblings->[$idx+1] if $siblings && $idx >= 0; if ($next) { - my $nmid = $next->{id}; + my $nmid = $next->{mid}; $rv .= $pad . $mapping->{$nmid}->[0]; my $nnext = $nr_s - $idx; if ($nnext > 2) { my $s = ($nnext - 1).' subsequent siblings'; $rv .= pad_link($nmid, $level, $s); } elsif (my $nn = $siblings->[$idx + 2]) { - $rv .= $pad . $mapping->{$nn->{id}}->[0]; + $rv .= $pad . $mapping->{$nn->{mid}}->[0]; } } $rv .= $pad ."$s_s, $s_c; $ctx->{s_nr}\n"; @@ -369,53 +365,51 @@ sub walk_thread ($$$) { sub pre_thread { # walk_thread callback my ($ctx, $level, $node, $idx) = @_; - $ctx->{mapping}->{$node->{id}} = [ '', $node, $idx, $level ]; + $ctx->{mapping}->{$node->{mid}} = [ '', $node, $idx, $level ]; skel_dump($ctx, $level, $node); } -sub thread_index_entry { - my ($ctx, $level, $smsg) = @_; +sub thread_eml_entry { + my ($ctx, $level, $smsg, $eml) = @_; my ($beg, $end) = thread_adj_level($ctx, $level); - $beg . '' . index_entry($smsg, $ctx, 0) . '' . $end; + $beg . '' . eml_entry($ctx, $smsg, $eml, 0) . '' . $end; } -sub stream_thread_i { # PublicInbox::WwwStream::getline callback - my ($nr, $ctx) = @_; - return unless exists($ctx->{skel}); - my $q = $ctx->{-queue}; +sub next_in_queue ($;$) { + my ($q, $ghost_ok) = @_; while (@$q) { - my $level = shift @$q; - my $node = shift @$q or next; + my ($level, $smsg) = splice(@$q, 0, 2); my $cl = $level + 1; - unshift @$q, map { ($cl, $_) } @{$node->{children}}; - if (my $smsg = $ctx->{-inbox}->smsg_mime($node->{smsg})) { - return thread_index_entry($ctx, $level, $smsg); - } else { - return ghost_index_entry($ctx, $level, $node); - } + unshift @$q, map { ($cl, $_) } @{$smsg->{children}}; + return ($level, $smsg) if $ghost_ok || exists($smsg->{blob}); } - join('', thread_adj_level($ctx, 0)) . ${delete $ctx->{skel}}; + undef; } -sub stream_thread ($$) { - my ($rootset, $ctx) = @_; - my $ibx = $ctx->{-inbox}; - my @q = map { (0, $_) } @$rootset; - my ($smsg, $level); - while (@q) { - $level = shift @q; - my $node = shift @q or next; - my $cl = $level + 1; - unshift @q, map { ($cl, $_) } @{$node->{children}}; - $smsg = $ibx->smsg_mime($node->{smsg}) and last; +sub stream_thread_i { # PublicInbox::WwwStream::getline callback + my ($ctx) = @_; + return unless exists($ctx->{skel}); + my $nr = $ctx->{nr}++; + my ($level, $smsg) = next_in_queue($ctx->{-queue}, $nr); + + $smsg or return + join('', thread_adj_level($ctx, 0)) . ${delete $ctx->{skel}}; + + my $eml = $ctx->{-inbox}->smsg_eml($smsg) or return + ghost_index_entry($ctx, $level, $smsg); + + if ($nr == 0) { + $ctx->{-title_html} = ascii_html($smsg->{subject}); + $ctx->html_top . thread_eml_entry($ctx, $level, $smsg, $eml); + } else { + thread_eml_entry($ctx, $level, $smsg, $eml); } - return missing_thread($ctx) unless $smsg; +} - $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; - $ctx->{-title_html} = ascii_html($smsg->{subject}); - $ctx->{-html_tip} = thread_index_entry($ctx, $level, $smsg); - $ctx->{-queue} = \@q; - PublicInbox::WwwStream->response($ctx, 200, \&stream_thread_i); +sub stream_thread ($$) { + my ($rootset, $ctx) = @_; + $ctx->{-queue} = [ map { (0, $_) } @$rootset ]; + PublicInbox::WwwStream::response($ctx, 200, \&stream_thread_i); } # /$INBOX/$MESSAGE_ID/t/ @@ -453,60 +447,64 @@ sub thread_html { return stream_thread($rootset, $ctx) unless $ctx->{flat}; # flat display: lazy load the full message from smsg - my $smsg; - while (my $m = shift @$msgs) { - $smsg = $ibx->smsg_mime($m) and last; - } - return missing_thread($ctx) unless $smsg; - $ctx->{-title_html} = ascii_html($smsg->{subject}); - $ctx->{-html_tip} = ''.index_entry($smsg, $ctx, scalar @$msgs); $ctx->{msgs} = $msgs; - PublicInbox::WwwStream->response($ctx, 200, \&thread_html_i); + $ctx->{-html_tip} = ''; + PublicInbox::WwwStream::response($ctx, 200, \&thread_html_i); } sub thread_html_i { # PublicInbox::WwwStream::getline callback - my ($nr, $ctx) = @_; + my ($ctx) = @_; my $msgs = $ctx->{msgs} or return; while (my $smsg = shift @$msgs) { - $ctx->{-inbox}->smsg_mime($smsg) or next; - return index_entry($smsg, $ctx, scalar @$msgs); + my $eml = $ctx->{-inbox}->smsg_eml($smsg) or next; + if (exists $ctx->{-html_tip}) { + $ctx->{-title_html} = ascii_html($smsg->{subject}); + return $ctx->html_top . + eml_entry($ctx, $smsg, $eml, scalar @$msgs); + } + return eml_entry($ctx, $smsg, $eml, scalar @$msgs); } my ($skel) = delete @$ctx{qw(skel msgs)}; $$skel; } sub multipart_text_as_html { - # ($mime, $ctx) = @_; # msg_iter will do "$_[0] = undef" + # ($mime, $ctx) = @_; # each_part may do "$_[0] = undef" # scan through all parts, looking for displayable text - msg_iter($_[0], \&add_text_body, $_[1], 1); + $_[0]->each_part(\&add_text_body, $_[1], 1); } -sub flush_quote { - my ($s, $l, $quot) = @_; - - my $rv = $l->to_html($$quot); - - # we use a here to allow users to specify their own - # color for quoted text - $$quot = undef; - $$s .= qq() . $rv . '' +sub submsg_hdr ($$) { + my ($ctx, $eml) = @_; + my $obfs_ibx = $ctx->{-obfs_ibx}; + my $rv = $ctx->{obuf}; + $$rv .= "\n"; + for my $h (qw(From To Cc Subject Date Message-ID X-Alt-Message-ID)) { + my @v = $eml->header($h); + for my $v (@v) { + obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; + $v = ascii_html($v); + $$rv .= "$h: $v\n"; + } + } } sub attach_link ($$$$;$) { my ($ctx, $ct, $p, $fn, $err) = @_; - my ($part, $depth, @idx) = @$p; - my $nl = $idx[-1] > 1 ? "\n" : ''; - my $idx = join('.', @idx); + my ($part, $depth, $idx) = @$p; + + # Eml iteration clobbers multipart ->{bdy}, so do not offer + # downloads for 0-byte multipart attachments + return unless $part->{bdy}; + + my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...) my $size = bytes::length($part->body); # hide attributes normally, unless we want to aid users in # spotting MUA problems: $ct =~ s/;.*// unless $err; $ct = ascii_html($ct); - my $desc = $part->header('Content-Description'); - $desc = $fn unless defined $desc; - $desc = '' unless defined $desc; my $sfn; if (defined $fn && $fn =~ /\A$PublicInbox::Hval::FN\z/o) { $sfn = $fn; @@ -518,39 +516,59 @@ sub attach_link ($$$$;$) { my $rv = $ctx->{obuf}; $$rv .= qq($nl{mhref}$idx-$sfn">); if ($err) { - $$rv .= "[-- Warning: decoded text below may be mangled --]\n"; + $$rv .= <
header('Content-Description') // $fn // ''; $desc = ascii_html($desc); $$rv .= ($desc eq '') ? "$ts --]" : "$desc --]\n[-- $ts --]"; $$rv .= " \n"; + + submsg_hdr($ctx, $part) if $part->{is_submsg}; + undef; } -sub add_text_body { # callback for msg_iter +sub add_text_body { # callback for each_part my ($p, $ctx) = @_; my $upfx = $ctx->{mhref}; my $ibx = $ctx->{-inbox}; - # $p - from msg_iter: [ Email::MIME, depth, @idx ] - my ($part, $depth, @idx) = @$p; + my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new; + # $p - from each_part: [ Email::MIME-like, depth, $idx ] + my ($part, $depth, $idx) = @$p; my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; my ($s, $err) = msg_part_text($part, $ct); return attach_link($ctx, $ct, $p, $fn) unless defined $s; + my $rv = $ctx->{obuf}; + if ($part->{is_submsg}) { + submsg_hdr($ctx, $part); + $$rv .= "\n"; + } + # makes no difference to browsers, and don't screw up filename # link generation in diffs with the extra '%0D' $s =~ s/\r\n/\n/sg; + # will be escaped to `•' in HTML + obfuscate_addrs($ibx, $s, "\x{2022}") if $ibx->{obfuscate}; + # always support diff-highlighting, but we can't linkify hunk # headers for solver unless some coderepo are configured: my $diff; - if ($s =~ /^(?:diff|---|\+{3}) /ms) { - # diffstat anchors do not link across attachments or messages: - $idx[0] = $upfx . $idx[0] if $upfx ne ''; - $ctx->{-apfx} = join('/', @idx); - $ctx->{-anchors} = {}; # attr => filename + if ($s =~ /^--- [^\n]+\n\+{3} [^\n]+\n@@ /ms) { + # diffstat anchors do not link across attachments or messages, + # -apfx is just a stable prefix for making diffstat anchors + # linkable to the first diff hunk w/o crossing attachments + $idx =~ tr!.!/!; # compatibility with previous versions + $ctx->{-apfx} = $upfx . $idx; + + # do attr => filename mappings for diffstats in git diffs: + $ctx->{-anchors} = {} if $s =~ /^diff --git /sm; $diff = 1; delete $ctx->{-long_path}; my $spfx; @@ -576,48 +594,48 @@ sub add_text_body { # callback for msg_iter $s .= "\n" unless $s =~ /\n\z/s; # split off quoted and unquoted blocks: - my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s); - $s = ''; - my $rv = $ctx->{obuf}; - if (defined($fn) || $depth > 0 || $err) { + my @sections = PublicInbox::MsgIter::split_quotes($s); + undef $s; # free memory + if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) { # badly-encoded message with $err? tell the world about it! attach_link($ctx, $ct, $p, $fn, $err); $$rv .= "\n"; } - my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new; foreach my $cur (@sections) { if ($cur =~ /\A>/) { - flush_quote($rv, $l, \$cur); + # we use a here to allow users to specify + # their own color for quoted text + $$rv .= qq(); + $$rv .= $l->to_html($cur); + $$rv .= ''; } elsif ($diff) { - flush_diff($rv, $ctx, \$cur); + flush_diff($ctx, \$cur); } else { # regular lines, OK $$rv .= $l->to_html($cur); - $cur = undef; } + undef $cur; # free memory } - - obfuscate_addrs($ibx, $$rv) if $ibx->{obfuscate}; } sub _msg_page_prepare_obuf { - my ($hdr, $ctx, $nr) = @_; + my ($hdr, $ctx) = @_; my $over = $ctx->{-inbox}->over; my $obfs_ibx = $ctx->{-obfs_ibx}; my $rv = ''; my $mids = mids_for_index($hdr); - if ($nr == 0) { - if ($ctx->{more}) { + my $nr = $ctx->{nr}++; + if ($nr) { # unlikely + $rv .= ''; + } else { + $ctx->{first_hdr} = $hdr; + if ($ctx->{smsg}) { $rv .= "WARNING: multiple messages have this Message-ID\n"; } $rv .= ""; # anchor for body start - } else { - $rv .= ''; - } - if ($over) { - $ctx->{-upfx} = '../'; } + $ctx->{-upfx} = '../' if $over; my @title; # (Subject[0], From[0]) for my $v ($hdr->header('From')) { my @n = PublicInbox::Address::names($v); @@ -658,7 +676,10 @@ sub _msg_page_prepare_obuf { obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P $rv .= "Date: $v\n"; } - $ctx->{-title_html} = join(' - ', @title); + if (!$nr) { # first (and only) message, common case + $ctx->{-title_html} = join(' - ', @title); + $rv = $ctx->html_top . $rv; + } if (scalar(@$mids) == 1) { # common case my $mhtml = ascii_html($mids->[0]); $rv .= "Message-ID: <$mhtml> "; @@ -831,7 +852,7 @@ sub indent_for { sub find_mid_root { my ($ctx, $level, $node, $idx) = @_; ++$ctx->{root_idx} if $level == 0; - if ($node->{id} eq $ctx->{mid}) { + if ($node->{mid} eq $ctx->{mid}) { $ctx->{found_mid_at} = $ctx->{root_idx}; return 0; } @@ -888,25 +909,25 @@ sub missing_thread { sub dedupe_subject { my ($prev_subj, $subj, $val) = @_; - my $omit = ''; # '"' denotes identical text omitted + my $omit; # '"' denotes identical text omitted my (@prev_pop, @curr_pop); while (@$prev_subj && @$subj && $subj->[-1] eq $prev_subj->[-1]) { push(@prev_pop, pop(@$prev_subj)); push(@curr_pop, pop(@$subj)); - $omit ||= $val; + $omit //= $val; } pop @$subj if @$subj && $subj->[-1] =~ /^re:\s*/i; if (scalar(@curr_pop) == 1) { - $omit = ''; + $omit = undef; push @$prev_subj, @prev_pop; push @$subj, @curr_pop; } - $omit; + $omit // ''; } sub skel_dump { # walk_thread callback - my ($ctx, $level, $node) = @_; - my $smsg = $node->{smsg} or return _skel_ghost($ctx, $level, $node); + my ($ctx, $level, $smsg) = @_; + $smsg->{blob} or return _skel_ghost($ctx, $level, $smsg); my $skel = $ctx->{skel}; my $cur = $ctx->{cur}; @@ -989,7 +1010,7 @@ sub skel_dump { # walk_thread callback sub _skel_ghost { my ($ctx, $level, $node) = @_; - my $mid = $node->{id}; + my $mid = $node->{mid}; my $d = ' [not found] '; $d .= ' ' if exists $ctx->{searchview}; $d .= indent_for($level) . th_pfx($level); @@ -1012,18 +1033,23 @@ sub _skel_ghost { sub sort_ds { [ sort { - (eval { $a->topmost->{smsg}->{ds} } || 0) <=> - (eval { $b->topmost->{smsg}->{ds} } || 0) + (eval { $a->topmost->{ds} } || 0) <=> + (eval { $b->topmost->{ds} } || 0) } @{$_[0]} ]; } # accumulate recent topics if search is supported # returns 200 if done, 404 if not sub acc_topic { # walk_thread callback - my ($ctx, $level, $node) = @_; - my $mid = $node->{id}; - my $smsg = $node->{smsg} // $ctx->{-inbox}->smsg_by_mid($mid); - if ($smsg) { + my ($ctx, $level, $smsg) = @_; + my $mid = $smsg->{mid}; + my $has_blob = $smsg->{blob} // do { + if (my $by_mid = $ctx->{-inbox}->smsg_by_mid($mid)) { + %$smsg = (%$smsg, %$by_mid); + 1; + } + }; + if ($has_blob) { my $subj = subject_normalized($smsg->{subject}); $subj = '(no subject)' if $subj eq ''; my $ds = $smsg->{ds}; @@ -1056,7 +1082,7 @@ sub acc_topic { # walk_thread callback sub dump_topics { my ($ctx) = @_; my $order = delete $ctx->{order}; # [ ds, subj1, subj2, subj3, ... ] - if (!@$order) { + unless ($order) { $ctx->{-html_tip} = '[No topics in range]'; return 404; } @@ -1099,6 +1125,7 @@ sub dump_topics { my @next_prev = @subj; # full copy my $omit = dedupe_subject($prev_subj, \@subj, ' "'); $prev_subj = \@next_prev; + $subj = join(' ', @subj); $subj = ascii_html($subj); obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx; $href = mid_href($mid); @@ -1131,8 +1158,9 @@ sub pagination_footer ($$) { "page: $next$prev"; } -sub index_nav { # callback for WwwStream - my (undef, $ctx) = @_; +sub index_nav { # callback for WwwStream::getline + my ($ctx) = @_; + return $ctx->html_top if exists $ctx->{-html_tip}; pagination_footer($ctx, '.') } @@ -1184,7 +1212,7 @@ sub index_topics { if (@$msgs) { walk_thread(thread_results($ctx, $msgs), $ctx, \&acc_topic); } - PublicInbox::WwwStream->response($ctx, dump_topics($ctx), \&index_nav); + PublicInbox::WwwStream::response($ctx, dump_topics($ctx), \&index_nav); } sub thread_adj_level { @@ -1214,7 +1242,7 @@ sub thread_adj_level { sub ghost_index_entry { my ($ctx, $level, $node) = @_; my ($beg, $end) = thread_adj_level($ctx, $level); - $beg . ''. ghost_parent($ctx->{-upfx}, $node->{id}) + $beg . ''. ghost_parent($ctx->{-upfx}, $node->{mid}) . '' . $end; }