X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FView.pm;h=1aa014fdd2397c6f508c4316ce83c3938d7e5bb0;hb=2394cb0bdc671605729b5a4c578ef4cd3b9813fd;hp=cad90a7934e1632c2a5a42b95b22c9698831b024;hpb=87dca6d8d5988c5eb54019cca342450b0b7dd6b7;p=public-inbox.git diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index cad90a79..1aa014fd 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2018 all contributors +# Copyright (C) 2014-2019 all contributors # License: AGPL-3.0+ # # Used for displaying the HTML web interface. @@ -6,6 +6,7 @@ package PublicInbox::View; use strict; use warnings; +use bytes (); # only for bytes::length use PublicInbox::MsgTime qw(msg_datestamp); use PublicInbox::Hval qw/ascii_html obfuscate_addrs/; use PublicInbox::Linkify; @@ -14,8 +15,11 @@ use PublicInbox::MsgIter; use PublicInbox::Address; use PublicInbox::WwwStream; use PublicInbox::Reply; +use PublicInbox::ViewDiff qw(flush_diff); require POSIX; - +use Time::Local qw(timegm); +use PublicInbox::SearchMsg qw(subject_normalized); +use constant COLS => 72; use constant INDENT => ' '; use constant TCHILD => '` '; sub th_pfx ($) { $_[0] == 0 ? '' : TCHILD }; @@ -26,7 +30,7 @@ sub msg_html { my ($ctx, $mime, $more, $smsg) = @_; my $hdr = $mime->header_obj; my $ibx = $ctx->{-inbox}; - my $obfs_ibx = $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; + $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; my $tip = _msg_html_prepare($hdr, $ctx, $more, 0); my $end = 2; PublicInbox::WwwStream->response($ctx, 200, sub { @@ -34,7 +38,7 @@ sub msg_html { if ($nr == 1) { # $more cannot be true w/o $smsg being defined: my $upfx = $more ? '../'.mid_escape($smsg->mid).'/' : ''; - $tip . multipart_text_as_html($mime, $upfx, $obfs_ibx) . + $tip . multipart_text_as_html($mime, $upfx, $ctx) . '
' } elsif ($more && @$more) { ++$end; @@ -57,20 +61,16 @@ sub msg_page { my ($ctx) = @_; my $mid = $ctx->{mid}; my $ibx = $ctx->{-inbox}; - my ($first, $more, $head, $tail, $db); + my ($first, $more); my $smsg; - if (my $srch = $ibx->search) { - $srch->retry_reopen(sub { - ($head, $tail, $db) = $srch->each_smsg_by_mid($mid); - for (; !defined($first) && $head != $tail; $head++) { - my @args = ($head, $db, $mid); - $smsg = PublicInbox::SearchMsg->get(@args); - $first = $ibx->msg_by_smsg($smsg); - } - if ($head != $tail) { - $more = [ $head, $tail, $db ]; - } - }); + if (my $over = $ibx->over) { + my ($id, $prev); + $smsg = $over->next_by_mid($mid, \$id, \$prev); + $first = $ibx->msg_by_smsg($smsg) if $smsg; + if ($first) { + my $next = $over->next_by_mid($mid, \$id, \$prev); + $more = [ $id, $prev, $next ] if $next; + } return unless $first; } else { $first = $ibx->msg_by_mid($mid) or return; @@ -81,24 +81,17 @@ sub msg_page { sub msg_html_more { my ($ctx, $more, $nr) = @_; my $str = eval { - my $smsg; - my ($head, $tail, $db) = @$more; + my ($id, $prev, $smsg) = @$more; my $mid = $ctx->{mid}; - for (; !defined($smsg) && $head != $tail; $head++) { - my $m = PublicInbox::SearchMsg->get($head, $db, $mid); - $smsg = $ctx->{-inbox}->smsg_mime($m); - } - if ($head == $tail) { # done - @$more = (); - } else { - $more->[0] = $head; - } + my $ibx = $ctx->{-inbox}; + $smsg = $ibx->smsg_mime($smsg); + my $next = $ibx->over->next_by_mid($mid, \$id, \$prev); + @$more = $next ? ($id, $prev, $next) : (); if ($smsg) { my $mime = $smsg->{mime}; my $upfx = '../' . mid_escape($smsg->mid) . '/'; _msg_html_prepare($mime->header_obj, $ctx, $more, $nr) . - multipart_text_as_html($mime, $upfx, - $ctx->{-obfs_ibx}) . + multipart_text_as_html($mime, $upfx, $ctx) . '
' } else { ''; @@ -128,6 +121,9 @@ sub msg_reply { my ($arg, $link, $reply_to_all) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); + if (ref($arg) eq 'SCALAR') { + return '
'.ascii_html($$arg).'
'; + } # mailto: link only works if address obfuscation is disabled if ($link) { @@ -174,10 +170,28 @@ sub in_reply_to { $refs->[-1]; } +sub fold_addresses ($) { + return $_[0] if length($_[0]) <= COLS; + # try to fold on commas after non-word chars before $lim chars, + # Try to get the "," preceeded by ">" or ")", but avoid folding + # on the comma where somebody uses "Lastname, Firstname". + # We also try to keep the last and penultimate addresses in + # the list on the same line if possible, hence the extra \z + # Fall back to folding on spaces at $lim + 1 chars + my $lim = COLS - 8; # 8 = "\t" display width + my $too_long = $lim + 1; + $_[0] =~ s/\s*\z//s; # Email::Simple doesn't strip trailing spaces + $_[0] = join("\n\t", + ($_[0] =~ /(.{0,$lim}\W(?:,|\z)| + .{1,$lim}(?:,|\z)| + .{1,$lim}| + .{$too_long,}?)(?:\s|\z)/xgo)); +} + sub _hdr_names_html ($$) { my ($hdr, $field) = @_; - my $val = $hdr->header($field) or return ''; - ascii_html(join(', ', PublicInbox::Address::names($val))); + my @vals = $hdr->header($field) or return ''; + ascii_html(join(', ', PublicInbox::Address::names(join(',', @vals)))); } sub nr_to_s ($$$) { @@ -189,7 +203,6 @@ sub nr_to_s ($$$) { # this is already inside a
 sub index_entry {
 	my ($smsg, $ctx, $more) = @_;
-	my $srch = $ctx->{srch};
 	my $subj = $smsg->subject;
 	my $mid_raw = $smsg->mid;
 	my $id = id_compress($mid_raw, 1);
@@ -199,6 +212,7 @@ sub index_entry {
 	my $irt;
 	my $obfs_ibx = $ctx->{-obfs_ibx};
 
+	$subj = '(no subject)' if $subj eq '';
 	my $rv = "* ";
 	$subj = ''.ascii_html($subj).'';
 	obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
@@ -206,23 +220,37 @@ sub index_entry {
 	$rv .= $subj . "\n";
 	$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
 	my @tocc;
-	my $mime = $smsg->{mime};
+	my $ds = $smsg->ds; # for v1 non-Xapian/SQLite users
+	# deleting {mime} is critical to memory use,
+	# the rest of the fields saves about 400K as we iterate across 1K msgs
+	my ($mime) = delete @$smsg{qw(mime ds ts blob subject)};
+
 	my $hdr = $mime->header_obj;
-	foreach my $f (qw(To Cc)) {
-		my $dst = _hdr_names_html($hdr, $f);
-		if ($dst ne '') {
-			obfuscate_addrs($obfs_ibx, $dst) if $obfs_ibx;
-			push @tocc, "$f: $dst";
-		}
-	}
 	my $from = _hdr_names_html($hdr, 'From');
 	obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
-	$rv .= "From: $from @ ".fmt_ts($smsg->ds)." UTC";
+	$rv .= "From: $from @ ".fmt_ts($ds)." UTC";
 	my $upfx = $ctx->{-upfx};
 	my $mhref = $upfx . mid_escape($mid_raw) . '/';
 	$rv .= qq{ (permalink / };
 	$rv .= qq{raw)\n};
-	$rv .= '  '.join('; +', @tocc) . "\n" if @tocc;
+	my $to = fold_addresses(_hdr_names_html($hdr, 'To'));
+	my $cc = fold_addresses(_hdr_names_html($hdr, 'Cc'));
+	my ($tlen, $clen) = (length($to), length($cc));
+	my $to_cc = '';
+	if (($tlen + $clen) > COLS) {
+		$to_cc .= '  To: '.$to."\n" if $tlen;
+		$to_cc .= '  Cc: '.$cc."\n" if $clen;
+	} else {
+		if ($tlen) {
+			$to_cc .= '  To: '.$to;
+			$to_cc .= '; +Cc: '.$cc if $clen;
+		} else {
+			$to_cc .= '  Cc: '.$cc if $clen;
+		}
+		$to_cc .= "\n";
+	}
+	obfuscate_addrs($obfs_ibx, $to_cc) if $obfs_ibx;
+	$rv .= $to_cc;
 
 	my $mapping = $ctx->{mapping};
 	if (!$mapping && (defined($irt) || defined($irt = in_reply_to($hdr)))) {
@@ -234,7 +262,8 @@ sub index_entry {
 	$rv .= "\n";
 
 	# scan through all parts, looking for displayable text
-	msg_iter($mime, sub { $rv .= add_text_body($mhref, $obfs_ibx, $_[0]) });
+	my $ibx = $ctx->{-inbox};
+	msg_iter($mime, sub { $rv .= add_text_body($mhref, $ctx, $_[0]) });
 
 	# add the footer
 	$rv .= "\n^ ".
@@ -288,7 +317,10 @@ sub _th_index_lite {
 	my $nr_s = 0;
 	my $siblings;
 	if (my $smsg = $node->{smsg}) {
-		($$irt) = (($smsg->{references} || '') =~ m/<([^>]+)>\z/);
+		# delete saves about 200KB on a 1K message thread
+		if (my $refs = delete $smsg->{references}) {
+			($$irt) = ($refs =~ m/<([^>]+)>\z/);
+		}
 	}
 	my $irt_map = $mapping->{$$irt} if defined $$irt;
 	if (defined $irt_map) {
@@ -346,7 +378,7 @@ sub walk_thread {
 	while (@q) {
 		my ($level, $node, $i) = splice(@q, 0, 3);
 		defined $node or next;
-		$cb->($ctx, $level, $node, $i);
+		$cb->($ctx, $level, $node, $i) or return;
 		++$level;
 		$i = 0;
 		unshift @q, map { ($level, $_, $i++) } @{$node->{children}};
@@ -367,7 +399,7 @@ sub thread_index_entry {
 
 sub stream_thread ($$) {
 	my ($rootset, $ctx) = @_;
-	my $inbox = $ctx->{-inbox};
+	my $ibx = $ctx->{-inbox};
 	my @q = map { (0, $_) } @$rootset;
 	my $level;
 	my $smsg;
@@ -376,11 +408,11 @@ sub stream_thread ($$) {
 		my $node = shift @q or next;
 		my $cl = $level + 1;
 		unshift @q, map { ($cl, $_) } @{$node->{children}};
-		$smsg = $inbox->smsg_mime($node->{smsg}) and last;
+		$smsg = $ibx->smsg_mime($node->{smsg}) and last;
 	}
 	return missing_thread($ctx) unless $smsg;
 
-	$ctx->{-obfs_ibx} = $inbox->{obfuscate} ? $inbox : undef;
+	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
 	$ctx->{-title_html} = ascii_html($smsg->subject);
 	$ctx->{-html_tip} = thread_index_entry($ctx, $level, $smsg);
 	$smsg = undef;
@@ -391,7 +423,7 @@ sub stream_thread ($$) {
 			my $node = shift @q or next;
 			my $cl = $level + 1;
 			unshift @q, map { ($cl, $_) } @{$node->{children}};
-			if ($smsg = $inbox->smsg_mime($node->{smsg})) {
+			if ($smsg = $ibx->smsg_mime($node->{smsg})) {
 				return thread_index_entry($ctx, $level, $smsg);
 			} else {
 				return ghost_index_entry($ctx, $level, $node);
@@ -407,8 +439,8 @@ sub stream_thread ($$) {
 sub thread_html {
 	my ($ctx) = @_;
 	my $mid = $ctx->{mid};
-	my $srch = $ctx->{srch};
-	my ($nr, $msgs) = $srch->get_thread($mid);
+	my $ibx = $ctx->{-inbox};
+	my ($nr, $msgs) = $ibx->over->get_thread($mid);
 	return missing_thread($ctx) if $nr == 0;
 	my $skel = '
';
 	$skel .= $nr == 1 ? 'only message in thread' : 'end of thread';
@@ -425,13 +457,13 @@ sub thread_html {
 	$ctx->{prev_level} = 0;
 	$ctx->{root_anchor} = anchor_for($mid);
 	$ctx->{mapping} = {};
-	$ctx->{s_nr} = "$nr+ messages in thread";
+	$ctx->{s_nr} = ($nr > 1 ? "$nr+ messages" : 'only message')
+	               .' in thread';
 
 	my $rootset = thread_results($ctx, $msgs);
 
 	# reduce hash lookups in pre_thread->skel_dump
-	my $inbox = $ctx->{-inbox};
-	$ctx->{-obfs_ibx} = $inbox->{obfuscate} ? $inbox : undef;
+	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
 	walk_thread($rootset, $ctx, *pre_thread);
 
 	$skel .= '
'; @@ -440,7 +472,7 @@ sub thread_html { # flat display: lazy load the full message from smsg my $smsg; while (my $m = shift @$msgs) { - $smsg = $inbox->smsg_mime($m) and last; + $smsg = $ibx->smsg_mime($m) and last; } return missing_thread($ctx) unless $smsg; $ctx->{-title_html} = ascii_html($smsg->subject); @@ -450,7 +482,7 @@ sub thread_html { return unless $msgs; $smsg = undef; while (my $m = shift @$msgs) { - $smsg = $inbox->smsg_mime($m) and last; + $smsg = $ibx->smsg_mime($m) and last; } return index_entry($smsg, $ctx, scalar @$msgs) if $smsg; $msgs = undef; @@ -459,11 +491,11 @@ sub thread_html { } sub multipart_text_as_html { - my ($mime, $upfx, $obfs_ibx) = @_; + my ($mime, $upfx, $ctx) = @_; my $rv = ""; # scan through all parts, looking for displayable text - msg_iter($mime, sub { $rv .= add_text_body($upfx, $obfs_ibx, $_[0]) }); + msg_iter($mime, sub { $rv .= add_text_body($upfx, $ctx, $_[0]) }); $rv; } @@ -472,12 +504,12 @@ sub flush_quote { # show everything in the full version with anchor from # short version (see above) - my $rv = $l->linkify_1(join('', @$quot)); - @$quot = (); + my $rv = $l->linkify_1($$quot); # we use a here to allow users to specify their own # color for quoted text $rv = $l->linkify_2(ascii_html($rv)); + $$quot = undef; $$s .= qq() . $rv . '' } @@ -496,7 +528,7 @@ sub attach_link ($$$$;$) { $desc = $fn unless defined $desc; $desc = '' unless defined $desc; my $sfn; - if (defined $fn && $fn =~ /\A[[:alnum:]][\w\.-]+[[:alnum:]]\z/) { + if (defined $fn && $fn =~ /\A$PublicInbox::Hval::FN\z/o) { $sfn = $fn; } elsif ($ct eq 'text/plain') { $sfn = 'a.txt'; @@ -516,74 +548,85 @@ sub attach_link ($$$$;$) { } sub add_text_body { - my ($upfx, $obfs_ibx, $p) = @_; + my ($upfx, $ctx, $p) = @_; + my $ibx = $ctx->{-inbox}; + my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef; # $p - from msg_iter: [ Email::MIME, depth, @idx ] - my ($part, $depth) = @$p; # attachment @idx is unused + my ($part, $depth, @idx) = @$p; my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; - - if ($ct =~ m!\btext/x?html\b!i) { - return attach_link($upfx, $ct, $p, $fn); - } - - my $s = eval { $part->body_str }; - - # badly-encoded message? tell the world about it! - my $err = $@; - if ($err) { - if ($ct =~ m!\btext/plain\b!i) { - # Try to assume UTF-8 because Alpine seems to - # do wacky things and set charset=X-UNKNOWN - $part->charset_set('UTF-8'); - $s = eval { $part->body_str }; - - # If forcing charset=UTF-8 failed, - # attach_link will warn further down... - $s = $part->body if $@; - } else { - return attach_link($upfx, $ct, $p, $fn); + my ($s, $err) = msg_part_text($part, $ct); + + return attach_link($upfx, $ct, $p, $fn) unless defined $s; + + # makes no difference to browsers, and don't screw up filename + # link generation in diffs with the extra '%0D' + $s =~ s/\r\n/\n/sg; + + # always support diff-highlighting, but we can't linkify hunk + # headers for solver unless some coderepo are configured: + my $diff; + if ($s =~ /^(?:diff|---|\+{3}) /ms) { + # diffstat anchors do not link across attachments or messages: + $idx[0] = $upfx . $idx[0] if $upfx ne ''; + $ctx->{-apfx} = join('/', @idx); + $ctx->{-anchors} = {}; # attr => filename + $ctx->{-diff} = $diff = []; + delete $ctx->{-long_path}; + my $spfx; + if ($ibx->{-repo_objs}) { + if (index($upfx, '//') >= 0) { # absolute URL (Atom feeds) + $spfx = $upfx; + $spfx =~ s!/([^/]*)/\z!/!; + } else { + my $n_slash = $upfx =~ tr!/!/!; + if ($n_slash == 0) { + $spfx = '../'; + } elsif ($n_slash == 1) { + $spfx = ''; + } else { # nslash == 2 + $spfx = '../../'; + } + } } - } + $ctx->{-spfx} = $spfx; + }; + + # some editors don't put trailing newlines at the end: + $s .= "\n" unless $s =~ /\n\z/s; - my @lines = split(/^/m, $s); + # split off quoted and unquoted blocks: + my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s); $s = ''; if (defined($fn) || $depth > 0 || $err) { + # badly-encoded message with $err? tell the world about it! $s .= attach_link($upfx, $ct, $p, $fn, $err); $s .= "\n"; } - my @quot; my $l = PublicInbox::Linkify->new; - foreach my $cur (@lines) { - if ($cur !~ /^>/) { - # show the previously buffered quote inline - flush_quote(\$s, $l, \@quot) if @quot; - - # regular line, OK + foreach my $cur (@sections) { + if ($cur =~ /\A>/) { + flush_quote(\$s, $l, \$cur); + } elsif ($diff) { + @$diff = split(/^/m, $cur); + $cur = undef; + flush_diff(\$s, $ctx, $l); + } else { + # regular lines, OK $l->linkify_1($cur); $s .= $l->linkify_2(ascii_html($cur)); - } else { - push @quot, $cur; + $cur = undef; } } - if (@quot) { # ugh, top posted - flush_quote(\$s, $l, \@quot); - obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx; - $s; - } else { - obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx; - if ($s =~ /\n\z/s) { # common, last line ends with a newline - $s; - } else { # some editors don't do newlines... - $s .= "\n"; - } - } + obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx; + $s; } sub _msg_html_prepare { my ($hdr, $ctx, $more, $nr) = @_; - my $srch = $ctx->{srch} if $ctx; my $atom = ''; + my $over = $ctx->{-inbox}->over; my $obfs_ibx = $ctx->{-obfs_ibx}; my $rv = ''; my $mids = mids($hdr); @@ -597,33 +640,51 @@ sub _msg_html_prepare { } else { $rv .= '
';
 	}
-	if ($srch) {
+	if ($over) {
 		$ctx->{-upfx} = '../';
 	}
-	my @title;
-	foreach my $h (qw(From To Cc Subject Date)) {
-		my $v = $hdr->header($h);
-		defined($v) && ($v ne '') or next;
+	my @title; # (Subject[0], From[0])
+	for my $v ($hdr->header('From')) {
 		$v = PublicInbox::Hval->new($v);
-
-		if ($h eq 'From') {
-			my @n = PublicInbox::Address::names($v->raw);
-			$title[1] = ascii_html(join(', ', @n));
-			obfuscate_addrs($obfs_ibx, $title[1]) if $obfs_ibx;
-		} elsif ($h eq 'Subject') {
-			$title[0] = $v->as_html;
-			if ($srch) {
-				$rv .= qq($h: );
-				$rv .= $v->as_html . "\n";
-				next;
+		my @n = PublicInbox::Address::names($v->raw);
+		$title[1] //= ascii_html(join(', ', @n));
+		$v = $v->as_html;
+		if ($obfs_ibx) {
+			obfuscate_addrs($obfs_ibx, $v);
+			obfuscate_addrs($obfs_ibx, $title[1]);
+		}
+		$rv .= "From: $v\n" if $v ne '';
+	}
+	foreach my $h (qw(To Cc)) {
+		for my $v ($hdr->header($h)) {
+			fold_addresses($v);
+			$v = ascii_html($v);
+			obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
+			$rv .= "$h: $v\n" if $v ne '';
+		}
+	}
+	my @subj = $hdr->header('Subject');
+	if (@subj) {
+		for my $v (@subj) {
+			$v = ascii_html($v);
+			obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
+			$rv .= 'Subject: ';
+			if ($over) {
+				$rv .= qq($v\n);
+			} else {
+				$rv .= "$v\n";
 			}
+			$title[0] //= $v;
 		}
-		$v = $v->as_html;
-		obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
-		$rv .= "$h: $v\n";
-
+	} else { # dummy anchor for thread skeleton at bottom of page
+		$rv .= qq() if $over;
+		$title[0] = '(no subject)';
+	}
+	for my $v ($hdr->header('Date')) {
+		$v = ascii_html($v);
+		obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P
+		$rv .= "Date: $v\n";
 	}
-	$title[0] ||= '(no subject)';
 	$ctx->{-title_html} = join(' - ', @title);
 	foreach (@$mids) {
 		my $mid = PublicInbox::Hval->new_msgid($_) ;
@@ -639,15 +700,15 @@ sub _msg_html_prepare {
 			$rv .= "(raw)\n";
 		}
 	}
-	$rv .= _parent_headers($hdr, $srch);
+	$rv .= _parent_headers($hdr, $over);
 	$rv .= "\n";
 }
 
 sub thread_skel {
 	my ($dst, $ctx, $hdr, $tpfx) = @_;
-	my $srch = $ctx->{srch};
 	my $mid = mids($hdr)->[0];
-	my ($nr, $msgs) = $srch->get_thread($mid);
+	my $ibx = $ctx->{-inbox};
+	my ($nr, $msgs) = $ibx->over->get_thread($mid);
 	my $expand = qq(expand[flat) .
 	                qq(|nested]  ) .
 			qq(mbox.gz  ) .
@@ -670,16 +731,17 @@ sub thread_skel {
 	$$dst .= "$nr+ messages / $expand";
 	$$dst .= qq!  top\n!;
 
-	my $subj = $hdr->header('Subject');
-	defined $subj or $subj = '';
-	$ctx->{prev_subj} = [ split(/ /, $srch->subject_normalized($subj)) ];
+	# nb: mutt only shows the first Subject in the index pane
+	# when multiple Subject: headers are present, so we follow suit:
+	my $subj = $hdr->header('Subject') // '';
+	$subj = '(no subject)' if $subj eq '';
+	$ctx->{prev_subj} = [ split(/ /, subject_normalized($subj)) ];
 	$ctx->{cur} = $mid;
 	$ctx->{prev_attr} = '';
 	$ctx->{prev_level} = 0;
 	$ctx->{dst} = $dst;
 
 	# reduce hash lookups in skel_dump
-	my $ibx = $ctx->{-inbox};
 	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
 	walk_thread(thread_results($ctx, $msgs), $ctx, *skel_dump);
 
@@ -687,38 +749,34 @@ sub thread_skel {
 }
 
 sub _parent_headers {
-	my ($hdr, $srch) = @_;
+	my ($hdr, $over) = @_;
 	my $rv = '';
-
-	my $irt = in_reply_to($hdr);
-	if (defined $irt) {
-		my $v = PublicInbox::Hval->new_msgid($irt);
-		my $html = $v->as_html;
-		my $href = $v->{href};
-		$rv .= "In-Reply-To: <";
-		$rv .= "$html>\n";
+	my @irt = $hdr->header_raw('In-Reply-To');
+	my $refs;
+	if (@irt) {
+		my $lnk = PublicInbox::Linkify->new;
+		$rv .= "In-Reply-To: $_\n" for @irt;
+		$lnk->linkify_mids('..', \$rv);
+	} else {
+		$refs = references($hdr);
+		my $irt = pop @$refs;
+		if (defined $irt) {
+			my $v = PublicInbox::Hval->new_msgid($irt);
+			my $html = $v->as_html;
+			my $href = $v->{href};
+			$rv .= "In-Reply-To: <";
+			$rv .= "$html>\n";
+		}
 	}
 
 	# do not display References: if search is present,
 	# we show the thread skeleton at the bottom, instead.
-	return $rv if $srch;
-
-	my $refs = $hdr->header_raw('References');
-	if ($refs) {
-		# avoid redundant URLs wasting bandwidth
-		my %seen;
-		$seen{$irt} = 1 if defined $irt;
-		my @refs;
-		my @raw_refs = ($refs =~ /<([^>]+)>/g);
-		foreach my $ref (@raw_refs) {
-			next if $seen{$ref};
-			$seen{$ref} = 1;
-			push @refs, linkify_ref_nosrch($ref);
-		}
+	return $rv if $over;
 
-		if (@refs) {
-			$rv .= 'References: '. join("\n\t", @refs) . "\n";
-		}
+	$refs //= references($hdr);
+	if (@$refs) {
+		@$refs = map { linkify_ref_no_over($_) } @$refs;
+		$rv .= 'References: '. join("\n\t", @$refs) . "\n";
 	}
 	$rv;
 }
@@ -726,12 +784,12 @@ sub _parent_headers {
 sub html_footer {
 	my ($hdr, $standalone, $ctx, $rhref) = @_;
 
-	my $srch = $ctx->{srch} if $ctx;
+	my $ibx = $ctx->{-inbox} if $ctx;
 	my $upfx = '../';
 	my $tpfx = '';
 	my $idx = $standalone ? " index" : '';
 	my $irt = '';
-	if ($idx && $srch) {
+	if ($idx && $ibx->over) {
 		$idx .= "\n";
 		thread_skel(\$idx, $ctx, $hdr, $tpfx);
 		my ($next, $prev);
@@ -771,7 +829,7 @@ sub html_footer {
 	$irt .= $idx;
 }
 
-sub linkify_ref_nosrch {
+sub linkify_ref_no_over {
 	my $v = PublicInbox::Hval->new_msgid($_[0]);
 	my $html = $v->as_html;
 	my $href = $v->{href};
@@ -797,10 +855,56 @@ sub indent_for {
 	$level ? INDENT x ($level - 1) : '';
 }
 
+sub find_mid_root {
+	my ($ctx, $level, $node, $idx) = @_;
+	++$ctx->{root_idx} if $level == 0;
+	if ($node->{id} eq $ctx->{mid}) {
+		$ctx->{found_mid_at} = $ctx->{root_idx};
+		return 0;
+	}
+	1;
+}
+
+sub strict_loose_note ($) {
+	my ($nr) = @_;
+	my $msg =
+"  -- strict thread matches above, loose matches on Subject: below --\n";
+
+	if ($nr > PublicInbox::Over::DEFAULT_LIMIT()) {
+		$msg .=
+"  -- use mbox.gz link to download all $nr messages --\n";
+	}
+	$msg;
+}
+
 sub thread_results {
 	my ($ctx, $msgs) = @_;
 	require PublicInbox::SearchThread;
-	PublicInbox::SearchThread::thread($msgs, *sort_ds, $ctx->{-inbox});
+	my $ibx = $ctx->{-inbox};
+	my $rootset = PublicInbox::SearchThread::thread($msgs, *sort_ds, $ibx);
+
+	# FIXME: `tid' is broken on --reindex, so that needs to be fixed
+	# and preserved in the future.  This bug is hidden by `sid' matches
+	# in get_thread, so we never noticed it until now.  And even when
+	# reindexing is fixed, we'll keep this code until a SCHEMA_VERSION
+	# bump since reindexing is expensive and users may not do it
+
+	# loose threading could've returned too many results,
+	# put the root the message we care about at the top:
+	my $mid = $ctx->{mid};
+	if (defined($mid) && scalar(@$rootset) > 1) {
+		$ctx->{root_idx} = -1;
+		my $nr = scalar @$msgs;
+		walk_thread($rootset, $ctx, *find_mid_root);
+		my $idx = $ctx->{found_mid_at};
+		if (defined($idx) && $idx != 0) {
+			my $tip = splice(@$rootset, $idx, 1);
+			@$rootset = reverse @$rootset;
+			unshift @$rootset, $tip;
+			$ctx->{sl_note} = strict_loose_note($nr);
+		}
+	}
+	$rootset
 }
 
 sub missing_thread {
@@ -809,11 +913,6 @@ sub missing_thread {
 	PublicInbox::ExtMsg::ext_msg($ctx);
 }
 
-sub _msg_date {
-	my ($hdr) = @_;
-	fmt_ts(msg_datestamp($hdr));
-}
-
 sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }
 
 sub dedupe_subject {
@@ -843,6 +942,10 @@ sub skel_dump {
 	my $cur = $ctx->{cur};
 	my $mid = $smsg->{mid};
 
+	if ($level == 0 && $ctx->{skel_dump_roots}++) {
+		$$dst .= delete $ctx->{sl_note} || '';
+	}
+
 	my $f = ascii_html($smsg->from_name);
 	my $obfs_ibx = $ctx->{-obfs_ibx};
 	obfuscate_addrs($obfs_ibx, $f) if $obfs_ibx;
@@ -861,7 +964,7 @@ sub skel_dump {
 			delete $ctx->{cur};
 			$$dst .= "$d".
 				 "$attr [this message]\n";
-			return;
+			return 1;
 		} else {
 			$ctx->{prev_msg} = $mid;
 		}
@@ -872,7 +975,7 @@ sub skel_dump {
 	# Subject is never undef, this mail was loaded from
 	# our Xapian which would've resulted in '' if it were
 	# really missing (and Filter rejects empty subjects)
-	my @subj = split(/ /, $ctx->{srch}->subject_normalized($smsg->subject));
+	my @subj = split(/ /, subject_normalized($smsg->subject));
 
 	# remove common suffixes from the subject if it matches the previous,
 	# so we do not show redundant text at the end.
@@ -901,6 +1004,7 @@ sub skel_dump {
 		$m = $ctx->{-upfx}.mid_escape($mid).'/';
 	}
 	$$dst .=  $d . "" . $end;
+	1;
 }
 
 sub _skel_ghost {
@@ -926,6 +1030,7 @@ sub _skel_ghost {
 	}
 	my $dst = $ctx->{dst};
 	$$dst .= $d;
+	1;
 }
 
 sub sort_ds {
@@ -939,20 +1044,20 @@ sub sort_ds {
 # returns 200 if done, 404 if not
 sub acc_topic {
 	my ($ctx, $level, $node) = @_;
-	my $srch = $ctx->{srch};
 	my $mid = $node->{id};
 	my $x = $node->{smsg} || $ctx->{-inbox}->smsg_by_mid($mid);
 	my ($subj, $ds);
 	my $topic;
 	if ($x) {
 		$subj = $x->subject;
-		$subj = $srch->subject_normalized($subj);
+		$subj = subject_normalized($subj);
+		$subj = '(no subject)' if $subj eq '';
 		$ds = $x->ds;
 		if ($level == 0) {
 			$topic = [ $ds, 1, { $subj => $mid }, $subj ];
 			$ctx->{-cur_topic} = $topic;
 			push @{$ctx->{order}}, $topic;
-			return;
+			return 1;
 		}
 
 		$topic = $ctx->{-cur_topic}; # should never be undef
@@ -966,11 +1071,12 @@ sub acc_topic {
 		}
 		$seen->{$subj} = $mid; # latest for subject
 	} else { # ghost message
-		return if $level != 0; # ignore child ghosts
+		return 1 if $level != 0; # ignore child ghosts
 		$topic = [ -666, 0, {} ];
 		$ctx->{-cur_topic} = $topic;
 		push @{$ctx->{order}}, $topic;
 	}
+	1;
 }
 
 sub dump_topics {
@@ -984,7 +1090,6 @@ sub dump_topics {
 	my @out;
 	my $ibx = $ctx->{-inbox};
 	my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
-	my $srch = $ctx->{srch};
 
 	# sort by recency, this allows new posts to "bump" old topics...
 	foreach my $topic (sort { $b->[0] <=> $a->[0] } @$order) {
@@ -1010,13 +1115,13 @@ sub dump_topics {
 
 		my $mbox = qq(mbox.gz);
 		my $atom = qq(Atom);
-		my $s = "$top\n" .
+		my $s = "$top\n" .
 			" $ds UTC $n - $mbox / $atom\n";
 		for (my $i = 0; $i < scalar(@ex); $i += 2) {
 			my $level = $ex[$i];
 			my $subj = $ex[$i + 1];
 			$mid = delete $seen->{$subj};
-			my @subj = split(/ /, $srch->subject_normalized($subj));
+			my @subj = split(/ /, subject_normalized($subj));
 			my @next_prev = @subj; # full copy
 			my $omit = dedupe_subject($prev_subj, \@subj, ' "');
 			$prev_subj = \@next_prev;
@@ -1032,43 +1137,81 @@ sub dump_topics {
 	200;
 }
 
+sub ts2str ($) {
+	my ($ts) = @_;
+	POSIX::strftime('%Y%m%d%H%M%S', gmtime($ts));
+}
+
+sub str2ts ($) {
+	my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $_[0]);
+	timegm($ss, $mm, $hh, $dd, $mon - 1, $yyyy);
+}
+
+sub pagination_footer ($$) {
+	my ($ctx, $latest) = @_;
+	delete $ctx->{qp} or return;
+	my $next = $ctx->{next_page} || '';
+	my $prev = $ctx->{prev_page} || '';
+	if ($prev) {
+		$next = $next ? "$next " : '     ';
+		$prev .= qq! latest!;
+	}
+	"
page: $next$prev
"; +} + sub index_nav { # callback for WwwStream my (undef, $ctx) = @_; - delete $ctx->{qp} or return; - my ($next, $prev); - $next = $prev = ' '; - my $latest = ''; + pagination_footer($ctx, '.') +} + +sub paginate_recent ($$) { + my ($ctx, $lim) = @_; + my $t = $ctx->{qp}->{t} || ''; + my $opts = { limit => $lim }; + my ($after, $before); - my $next_o = $ctx->{-next_o}; - if ($next_o) { - $next = qq!next!; + # Xapian uses '..' but '-' is perhaps friendier to URL linkifiers + # if only $after exists "YYYYMMDD.." because "." could be skipped + # if interpreted as an end-of-sentence + $t =~ s/\A([0-9]{8,14})-// and $after = str2ts($1); + $t =~ /\A([0-9]{8,14})\z/ and $before = str2ts($1); + + my $ibx = $ctx->{-inbox}; + my $msgs = $ibx->recent($opts, $after, $before); + my $nr = scalar @$msgs; + if ($nr < $lim && defined($after)) { + $after = $before = undef; + $msgs = $ibx->recent($opts); + $nr = scalar @$msgs; } - if (my $cur_o = $ctx->{-cur_o}) { - $latest = qq! latest!; - - my $o = $cur_o - ($next_o - $cur_o); - if ($o > 0) { - $prev = qq!prev!; - } elsif ($o == 0) { - $prev = qq!prev!; + my $more = $nr == $lim; + my ($newest, $oldest); + if ($nr) { + $newest = $msgs->[0]->{ts}; + $oldest = $msgs->[-1]->{ts}; + # if we only had $after, our SQL query in ->recent ordered + if ($newest < $oldest) { + ($oldest, $newest) = ($newest, $oldest); + $more = 0 if defined($after) && $after < $oldest; } } - "
page: $next $prev$latest
"; + if (defined($oldest) && $more) { + my $s = ts2str($oldest); + $ctx->{next_page} = qq!next!; + } + if (defined($newest) && (defined($before) || defined($after))) { + my $s = ts2str($newest); + $ctx->{prev_page} = qq!prev!; + } + $msgs; } sub index_topics { my ($ctx) = @_; - my ($off) = (($ctx->{qp}->{o} || '0') =~ /(\d+)/); - - $ctx->{order} = []; - my $srch = $ctx->{srch}; - my $msgs = $ctx->{-inbox}->recent({offset => $off, limit => 200 }); - my $nr = scalar @$msgs; - if ($nr) { + my $msgs = paginate_recent($ctx, 200); # 200 is our window + if (@$msgs) { walk_thread(thread_results($ctx, $msgs), $ctx, *acc_topic); } - $ctx->{-next_o} = $off + $nr; - $ctx->{-cur_o} = $off; PublicInbox::WwwStream->response($ctx, dump_topics($ctx), *index_nav); }