sub eml_entry { - my ($ctx, $eml, $more) = @_; + my ($ctx, $eml) = @_; my $smsg = delete $ctx->{smsg}; my $subj = delete $smsg->{subject}; my $mid_raw = $smsg->{mid}; @@ -199,16 +207,15 @@ sub eml_entry { # Deleting these fields saves about 400K as we iterate across 1K msgs delete @$smsg{qw(ts blob)}; - my $hdr = $eml->header_obj; - my $from = _hdr_names_html($hdr, 'From'); + my $from = _hdr_names_html($eml, 'From'); obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx; $rv .= "From: $from @ ".fmt_ts($ds)." UTC"; my $upfx = $ctx->{-upfx}; my $mhref = $upfx . mid_href($mid_raw) . '/'; $rv .= qq{ (permalink / }; $rv .= qq{raw)\n}; - my $to = fold_addresses(_hdr_names_html($hdr, 'To')); - my $cc = fold_addresses(_hdr_names_html($hdr, 'Cc')); + my $to = fold_addresses(_hdr_names_html($eml, 'To')); + my $cc = fold_addresses(_hdr_names_html($eml, 'Cc')); my ($tlen, $clen) = (length($to), length($cc)); my $to_cc = ''; if (($tlen + $clen) > COLS) { @@ -227,7 +234,7 @@ sub eml_entry { $rv .= $to_cc; my $mapping = $ctx->{mapping}; - if (!$mapping && (defined($irt) || defined($irt = in_reply_to($hdr)))) { + if (!$mapping && (defined($irt) || defined($irt = in_reply_to($eml)))) { my $href = $upfx . mid_href($irt) . '/'; my $html = ascii_html($irt); $rv .= qq(In-Reply-To: <$html>\n) @@ -253,7 +260,6 @@ sub eml_entry { } elsif ($mapping) { my $nested = 'nested'; my $flat = 'flat'; - my $end = ''; if ($ctx->{flat}) { $hr = 1; $flat = "$flat"; @@ -267,15 +273,23 @@ sub eml_entry { $hr = $ctx->{-hr}; } - $rv .= $more ? '
' : '' if $hr; + # do we have more messages? start a new
if so + $rv .= scalar(@{$ctx->{msgs}}) ? '
' : '' if $hr; $rv; } sub pad_link ($$;$) { my ($mid, $level, $s) = @_; $s ||= '...'; - my $id = id_compress($mid, 1); - (' 'x19).indent_for($level).th_pfx($level)."($s)\n"; + my $href = defined($mid) ? + ("($s)\n") : + "($s)\n"; + (' 'x19).indent_for($level).th_pfx($level).$href; +} + +sub _skel_hdr { + # my ($mapping, $mid) = @_; + ($_[0]->{$_[1] // \'bogus'} // [ "(?)\n" ])->[0]; } sub _th_index_lite { @@ -307,10 +321,11 @@ sub _th_index_lite { my $s = ($idx - 1). ' preceding siblings ...'; $rv .= pad_link($pmid, $level, $s); } elsif ($idx == 2) { - my $ppmid = $siblings->[0]->{mid}; - $rv .= $pad . $mapping->{$ppmid}->[0]; + $rv .= $pad . _skel_hdr($mapping, + $siblings->[0] ? + $siblings->[0]->{mid} : undef); } - $rv .= $pad . $mapping->{$pmid}->[0]; + $rv .= $pad . _skel_hdr($mapping, $pmid); } } my $s_s = nr_to_s($nr_s, 'sibling', 'siblings'); @@ -320,26 +335,26 @@ sub _th_index_lite { $attr =~ s!]+>([^<]+)!$1!s; # no point linking to self $rv .= "@ $attr"; if ($nr_c) { - my $cmid = $children->[0]->{mid}; - $rv .= $pad . $mapping->{$cmid}->[0]; + my $cmid = $children->[0] ? $children->[0]->{mid} : undef; + $rv .= $pad . _skel_hdr($mapping, $cmid); if ($nr_c > 2) { my $s = ($nr_c - 1). ' more replies'; $rv .= pad_link($cmid, $level + 1, $s); } elsif (my $cn = $children->[1]) { - $rv .= $pad . $mapping->{$cn->{mid}}->[0]; + $rv .= $pad . _skel_hdr($mapping, $cn->{mid}); } } my $next = $siblings->[$idx+1] if $siblings && $idx >= 0; if ($next) { my $nmid = $next->{mid}; - $rv .= $pad . $mapping->{$nmid}->[0]; + $rv .= $pad . _skel_hdr($mapping, $nmid); my $nnext = $nr_s - $idx; if ($nnext > 2) { my $s = ($nnext - 1).' subsequent siblings'; $rv .= pad_link($nmid, $level, $s); } elsif (my $nn = $siblings->[$idx + 2]) { - $rv .= $pad . $mapping->{$nn->{mid}}->[0]; + $rv .= $pad . _skel_hdr($mapping, $nn->{mid}); } } $rv .= $pad ."$s_s, $s_c; $ctx->{s_nr}\n"; @@ -368,7 +383,7 @@ sub pre_thread { # walk_thread callback sub thread_eml_entry { my ($ctx, $eml) = @_; my ($beg, $end) = thread_adj_level($ctx, $ctx->{level}); - $beg . '
' . eml_entry($ctx, $eml, 0) . '' . $end; + $beg . '
' . eml_entry($ctx, $eml) . '' . $end; } sub next_in_queue ($$) { @@ -384,7 +399,7 @@ sub next_in_queue ($$) { sub stream_thread_i { # PublicInbox::WwwStream::getline callback my ($ctx, $eml) = @_; - goto &thread_eml_entry if $eml; # tail recursion + return thread_eml_entry($ctx, $eml) if $eml; return unless exists($ctx->{skel}); my $ghost_ok = $ctx->{nr}++; while (1) { @@ -415,28 +430,39 @@ sub stream_thread ($$) { PublicInbox::WwwStream::aresponse($ctx, 200, \&stream_thread_i); } -# /$INBOX/$MESSAGE_ID/t/ +# /$INBOX/$MSGID/t/ and /$INBOX/$MSGID/T/ sub thread_html { my ($ctx) = @_; + $ctx->{-upfx} = '../../'; my $mid = $ctx->{mid}; - my $ibx = $ctx->{-inbox}; + my $ibx = $ctx->{ibx}; my ($nr, $msgs) = $ibx->over->get_thread($mid); return missing_thread($ctx) if $nr == 0; + + # link $INBOX_DIR/description text to "index_topics" view around + # the newest message in this thread + my $t = ts2str($ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs)); + my $t_fmt = fmt_ts($ctx->{-t_max}); + my $skel = '
'; $skel .= $nr == 1 ? 'only message in thread' : 'end of thread'; - $skel .= ", back to index\n\n"; + $skel .= <~$t_fmt UTC | newest] + +EOF $skel .= "Thread overview: "; $skel .= $nr == 1 ? '(only message)' : "$nr+ messages"; $skel .= " (download: mbox.gz"; $skel .= " / follow: Atom feed)\n"; $skel .= "-- links below jump to the message on this page --\n"; - $ctx->{-upfx} = '../../'; $ctx->{cur_level} = 0; $ctx->{skel} = \$skel; $ctx->{prev_attr} = ''; $ctx->{prev_level} = 0; - $ctx->{root_anchor} = anchor_for($mid); - $ctx->{mapping} = {}; + $ctx->{root_anchor} = 'm' . id_compress($mid, 1); + $ctx->{mapping} = {}; # mid -> [ header_summary, node, idx, level ] $ctx->{s_nr} = ($nr > 1 ? "$nr+ messages" : 'only message') .' in thread'; @@ -463,7 +489,7 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback $ctx->{-title_html} = ascii_html($smsg->{subject}); $ctx->zmore($ctx->html_top); } - return eml_entry($ctx, $eml, scalar @{$ctx->{msgs}}); + return eml_entry($ctx, $eml); } else { while (my $smsg = shift @{$ctx->{msgs}}) { return $smsg if exists($smsg->{blob}); @@ -505,7 +531,8 @@ sub attach_link ($$$$;$) { return unless $part->{bdy}; my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...) - my $size = bytes::length($part->body); + my $size = length($part->body); + delete $part->{bdy}; # save memory # hide attributes normally, unless we want to aid users in # spotting MUA problems: @@ -541,7 +568,7 @@ EOF sub add_text_body { # callback for each_part my ($p, $ctx) = @_; my $upfx = $ctx->{mhref}; - my $ibx = $ctx->{-inbox}; + my $ibx = $ctx->{ibx}; my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new; # $p - from each_part: [ Email::MIME-like, depth, $idx ] my ($part, $depth, $idx) = @$p; @@ -558,7 +585,7 @@ sub add_text_body { # callback for each_part # makes no difference to browsers, and don't screw up filename # link generation in diffs with the extra '%0D' - $s =~ s/\r\n/\n/sg; + $s =~ s/\r+\n/\n/sg; # will be escaped to `•' in HTML obfuscate_addrs($ibx, $s, "\x{2022}") if $ibx->{obfuscate}; @@ -578,8 +605,9 @@ sub add_text_body { # callback for each_part $diff = 1; delete $ctx->{-long_path}; my $spfx; - if ($ibx->{-repo_objs}) { - if (index($upfx, '//') >= 0) { # absolute URL (Atom feeds) + # absolute URL (Atom feeds) + if ($ibx->{coderepo}) { + if (index($upfx, '//') >= 0) { $spfx = $upfx; $spfx =~ s!/([^/]*)/\z!/!; } else { @@ -596,9 +624,6 @@ sub add_text_body { # callback for each_part $ctx->{-spfx} = $spfx; }; - # some editors don't put trailing newlines at the end: - $s .= "\n" unless $s =~ /\n\z/s; - # split off quoted and unquoted blocks: my @sections = PublicInbox::MsgIter::split_quotes($s); undef $s; # free memory @@ -607,6 +632,7 @@ sub add_text_body { # callback for each_part attach_link($ctx, $ct, $p, $fn, $err); $$rv .= "\n"; } + delete $part->{bdy}; # save memory foreach my $cur (@sections) { if ($cur =~ /\A>/) { # we use a here to allow users to specify @@ -625,25 +651,28 @@ sub add_text_body { # callback for each_part } sub _msg_page_prepare_obuf { - my ($hdr, $ctx) = @_; - my $over = $ctx->{-inbox}->over; + my ($eml, $ctx) = @_; + my $over = $ctx->{ibx}->over; my $obfs_ibx = $ctx->{-obfs_ibx}; my $rv = ''; - my $mids = mids_for_index($hdr); + my $mids = mids_for_index($eml); my $nr = $ctx->{nr}++; if ($nr) { # unlikely + if ($ctx->{chash} eq content_hash($eml)) { + warn "W: BUG? @$mids not deduplicated properly\n"; + return \$rv; + } + $rv .= +" WARNING: multiple messages have this Message-ID\n"; $rv .= ''; } else { - $ctx->{first_hdr} = $hdr; - if ($ctx->{smsg}) { - $rv .= -"WARNING: multiple messages have this Message-ID\n"; - } + $ctx->{first_hdr} = $eml->header_obj; + $ctx->{chash} = content_hash($eml) if $ctx->{smsg}; # reused MID $rv .= ""; # anchor for body start } $ctx->{-upfx} = '../' if $over; my @title; # (Subject[0], From[0]) - for my $v ($hdr->header('From')) { + for my $v ($eml->header('From')) { my @n = PublicInbox::Address::names($v); $v = ascii_html($v); $title[1] //= ascii_html(join(', ', @n)); @@ -654,14 +683,14 @@ sub _msg_page_prepare_obuf { $rv .= "From: $v\n" if $v ne ''; } foreach my $h (qw(To Cc)) { - for my $v ($hdr->header($h)) { + for my $v ($eml->header($h)) { fold_addresses($v); $v = ascii_html($v); obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; $rv .= "$h: $v\n" if $v ne ''; } } - my @subj = $hdr->header('Subject'); + my @subj = $eml->header('Subject'); if (@subj) { my $v = ascii_html(shift @subj); obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; @@ -677,10 +706,10 @@ sub _msg_page_prepare_obuf { $rv .= qq() if $over; $title[0] = '(no subject)'; } - for my $v ($hdr->header('Date')) { + for my $v ($eml->header('Date')) { $v = ascii_html($v); obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P - $rv .= "Date: $v\n"; + $rv .= qq{Date: $v\t[thread overview]\n}; } if (!$nr) { # first (and only) message, common case $ctx->{-title_html} = join(' - ', @title); @@ -696,12 +725,12 @@ sub _msg_page_prepare_obuf { my $lnk = PublicInbox::Linkify->new; my $s = ''; for my $h (qw(Message-ID X-Alt-Message-ID)) { - $s .= "$h: $_\n" for ($hdr->header_raw($h)); + $s .= "$h: $_\n" for ($eml->header_raw($h)); } $lnk->linkify_mids('..', \$s, 1); $rv .= $s; } - $rv .= _parent_headers($hdr, $over); + $rv .= _parent_headers($eml, $over); $rv .= "\n"; \$rv; } @@ -716,7 +745,7 @@ sub SKEL_EXPAND () { sub thread_skel ($$$) { my ($skel, $ctx, $hdr) = @_; my $mid = mids($hdr)->[0]; - my $ibx = $ctx->{-inbox}; + my $ibx = $ctx->{ibx}; my ($nr, $msgs) = $ibx->over->get_thread($mid); my $parent = in_reply_to($hdr); $$skel .= "\nThread overview: "; @@ -725,7 +754,8 @@ sub thread_skel ($$$) { $$skel .= SKEL_EXPAND."\n "; $$skel .= ghost_parent('../', $parent) . "\n"; } else { - $$skel .= '[no followups] '.SKEL_EXPAND."\n"; + $$skel .= "[no followups] ". + SKEL_EXPAND."\n"; } $ctx->{next_msg} = undef; $ctx->{parent_msg} = $parent; @@ -784,15 +814,22 @@ sub _parent_headers { $rv; } -# returns a string buffer via ->getline +# returns a string buffer sub html_footer { my ($ctx, $hdr) = @_; - my $ibx = $ctx->{-inbox}; + my $ibx = $ctx->{ibx}; my $upfx = '../'; - my $skel = " index"; + my $skel; my $rv = ''; if ($ibx->over) { - $skel .= "\n"; + my $t = ts2str($ctx->{-t_max}); + my $t_fmt = fmt_ts($ctx->{-t_max}); + $skel .= <~$t_fmt UTC|newest] +EOF + thread_skel(\$skel, $ctx, $hdr); my ($next, $prev); my $parent = ' '; @@ -823,6 +860,8 @@ sub html_footer { $parent = " parent"; } $rv .= "$next $prev$parent "; + } else { # unindexed inboxes w/o over + $skel = qq( latest); } $rv .= qq(reply); $rv .= $skel; @@ -837,11 +876,6 @@ sub linkify_ref_no_over { "<$html>"; } -sub anchor_for { - my ($msgid) = @_; - 'm' . id_compress($msgid, 1); -} - sub ghost_parent { my ($upfx, $mid) = @_; @@ -860,7 +894,7 @@ sub find_mid_root { ++$ctx->{root_idx} if $level == 0; if ($node->{mid} eq $ctx->{mid}) { $ctx->{found_mid_at} = $ctx->{root_idx}; - return 0; + return 0; # stop iterating } 1; } @@ -943,7 +977,7 @@ sub skel_dump { # walk_thread callback $$skel .= delete($ctx->{sl_note}) || ''; } - my $f = ascii_html($smsg->{from_name}); + my $f = ascii_html(delete $smsg->{from_name}); my $obfs_ibx = $ctx->{-obfs_ibx}; obfuscate_addrs($obfs_ibx, $f) if $obfs_ibx; @@ -1038,10 +1072,10 @@ sub _skel_ghost { } sub sort_ds { - [ sort { + @{$_[0]} = sort { (eval { $a->topmost->{ds} } || 0) <=> (eval { $b->topmost->{ds} } || 0) - } @{$_[0]} ]; + } @{$_[0]}; } # accumulate recent topics if search is supported @@ -1050,7 +1084,7 @@ sub acc_topic { # walk_thread callback my ($ctx, $level, $smsg) = @_; my $mid = $smsg->{mid}; my $has_blob = $smsg->{blob} // do { - if (my $by_mid = $ctx->{-inbox}->smsg_by_mid($mid)) { + if (my $by_mid = $ctx->{ibx}->smsg_by_mid($mid)) { %$smsg = (%$smsg, %$by_mid); 1; } @@ -1094,7 +1128,7 @@ sub dump_topics { } my @out; - my $ibx = $ctx->{-inbox}; + my $ibx = $ctx->{ibx}; my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef; # sort by recency, this allows new posts to "bump" old topics... @@ -1119,10 +1153,8 @@ sub dump_topics { $anchor = '#t'; # thread skeleton } - my $mbox = qq(mbox.gz); - my $atom = qq(Atom); my $s = "$top_subj\n" . - " $ds UTC $n - $mbox / $atom\n"; + " $ds UTC $n\n"; for (my $i = 0; $i < scalar(@extra); $i += 2) { my $level = $extra[$i]; my $subj = $extra[$i + 1]; # already normalized @@ -1144,22 +1176,18 @@ sub dump_topics { 200; } -# only for the t= query parameter passed to overview DB -sub ts2str ($) { strftime('%Y%m%d%H%M%S', gmtime($_[0])) }; - sub str2ts ($) { my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $_[0]); - timegm($ss, $mm, $hh, $dd, $mon - 1, $yyyy); + timegm($ss || 0, $mm || 0, $hh || 0, $dd, $mon - 1, $yyyy); } sub pagination_footer ($$) { my ($ctx, $latest) = @_; - delete $ctx->{qp} or return; my $next = $ctx->{next_page} || ''; my $prev = $ctx->{prev_page} || ''; - if ($prev) { - $next = $next ? "$next " : ' '; - $prev .= qq! latest!; + if ($prev) { # aligned padding for: 'next (older) | ' + $next = $next ? "$next | " : ' | '; + $prev .= qq[ | latest]; } " page: $next$prev"; } @@ -1176,7 +1204,7 @@ sub paginate_recent ($$) { $t =~ s/\A([0-9]{8,14})-// and $after = str2ts($1); $t =~ /\A([0-9]{8,14})\z/ and $before = str2ts($1); - my $ibx = $ctx->{-inbox}; + my $ibx = $ctx->{ibx}; my $msgs = $ibx->recent($opts, $after, $before); my $nr = scalar @$msgs; if ($nr < $lim && defined($after)) { @@ -1197,15 +1225,18 @@ sub paginate_recent ($$) { } if (defined($oldest) && $more) { my $s = ts2str($oldest); - $ctx->{next_page} = qq!next!; + $ctx->{next_page} = qq[] . + 'next (older)'; } if (defined($newest) && (defined($before) || defined($after))) { my $s = ts2str($newest); - $ctx->{prev_page} = qq!prev!; + $ctx->{prev_page} = qq[] . + 'prev (newer)'; } $msgs; } +# GET /$INBOX - top-level inbox view for indexed inboxes sub index_topics { my ($ctx) = @_; my $msgs = paginate_recent($ctx, 200); # 200 is our window @@ -1243,7 +1274,7 @@ sub thread_adj_level { sub ghost_index_entry { my ($ctx, $level, $node) = @_; my ($beg, $end) = thread_adj_level($ctx, $level); - $beg . ''. ghost_parent($ctx->{-upfx}, $node->{mid}) + $beg . ''. ghost_parent($ctx->{-upfx}, $node->{mid} // '?') . '' . $end; }