lib/PublicInbox/Search.pm | 15 ++++++++++++---
lib/PublicInbox/View.pm | 72 +++++++++++++++++++++++++++++++++++++++++++++--------
lib/PublicInbox/WWW.pm | 20 +++++++++++++++++++-
t/search.t | 16 ++++++++--------
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 39b06b0afe6045fcd220eee77133a12144f4d476..f4f00b252ced067b4a7df8c5617638fa47b6855d 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -12,7 +12,10 @@ use PublicInbox::MID qw/mid_clean mid_compressed/;
use constant {
TS => 0,
- SCHEMA_VERSION => 0,
+ # SCHEMA_VERSION history
+ # 0 - initial
+ # 1 - subject_path is lower-cased
+ SCHEMA_VERSION => 1,
LANG => 'english',
QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
};
@@ -207,6 +210,12 @@ my $query = $self->qp->parse_query($query_string, QP_FLAGS);
$query = Search::Xapian::Query->new(OP_AND, $mail_query, $query);
$self->do_enquire($query, $opts);
+}
+
+sub get_subject_path {
+ my ($self, $path, $opts) = @_;
+ my $query = $self->qp->parse_query("path:$path", 0);
+ $self->do_enquire($query);
}
# given a message ID, get replies to a message
@@ -461,13 +470,13 @@ }
# normalize subjects so they are suitable as pathnames for URLs
sub subject_path {
- my ($subj) = @_;
+ my $subj = pop;
$subj =~ s/\A\s+//;
$subj =~ s/\s+\z//;
$subj =~ s/^(?:re|aw):\s*//i; # remove reply prefix (aw: German)
$subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
- $subj;
+ lc($subj);
}
sub do_cat_mail {
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index c40a2a75c8ab119345c5fd7bfdcd97ce74fd2d3d..696d7d5a0984bdad067564d932b2490b51d7b7e3 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -31,7 +31,7 @@ $footer = "\n" . $footer;
} else {
$footer = '';
}
- headers_to_html_header($mime, $full_pfx) .
+ headers_to_html_header($mime, $full_pfx, $srch) .
multipart_text_as_html($mime, $full_pfx) .
'
' . PRE_WRAP .
html_footer($mime, 1, $full_pfx, $srch) . $footer .
@@ -179,6 +179,52 @@
$rv .= "
" . PRE_WRAP . $next . $foot . "";
}
+sub subject_path_html {
+ my (undef, $ctx, $foot, $srch) = @_;
+ my $path = $ctx->{subject_path};
+ my $res = $srch->get_subject_path($path);
+ my $rv = '';
+ require PublicInbox::GitCatFile;
+ my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+ my $nr = scalar @{$res->{msgs}};
+ return $rv if $nr == 0;
+ my @msgs;
+ while (my $smsg = shift @{$res->{msgs}}) {
+ my $m = $smsg->mid;
+
+ # Duplicated from WWW.pm
+ my ($x2, $x38) = ($m =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/);
+
+ unless (defined $x38) {
+ require Digest::SHA;
+ $m = Digest::SHA::sha1_hex($m);
+ ($x2, $x38) = ($m =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/);
+ }
+
+ # FIXME: duplicated code from Feed.pm
+ my $mime = eval {
+ my $str = $git->cat_file("HEAD:$x2/$x38");
+ Email::MIME->new($str);
+ };
+ unless ($@) {
+ my $t = eval { str2time($mime->header('Date')) };
+ defined($t) or $t = 0;
+ $mime->header_set('X-PI-TS', $t);
+ push @msgs, $mime;
+ }
+ }
+ require PublicInbox::Thread;
+ my $th = PublicInbox::Thread->new(@msgs);
+ $th->thread;
+ $th->order(*PublicInbox::Thread::sort_ts);
+ my $state = [ undef, { root_anchor => 'dummy' }, undef, 0 ];
+ thread_entry(\$rv, $state, $_, 0) for $th->rootset;
+ my $final_anchor = $state->[3];
+ my $next = "end of thread\n";
+
+ $rv .= "
" . PRE_WRAP . $next . $foot . "";
+}
+
# only private functions below.
sub index_walk {
@@ -235,7 +281,7 @@ $default;
}
sub multipart_text_as_html {
- my ($mime, $full_pfx) = @_;
+ my ($mime, $full_pfx, $srch) = @_;
my $rv = "";
my $part_nr = 0;
my $enc_msg = enc_for($mime->header("Content-Type"));
@@ -339,7 +385,7 @@ $s;
}
sub headers_to_html_header {
- my ($mime, $full_pfx) = @_;
+ my ($mime, $full_pfx, $srch) = @_;
my $rv = "";
my @title;
@@ -347,18 +393,21 @@ foreach my $h (qw(From To Cc Subject Date)) {
my $v = $mime->header($h);
defined($v) && length($v) or next;
$v = PublicInbox::Hval->new_oneline($v);
- $rv .= "$h: " . $v->as_html . "\n";
if ($h eq 'From') {
my @from = Email::Address->parse($v->raw);
- $v = $from[0]->name;
- unless (defined($v) && length($v)) {
- $v = '<' . $from[0]->address . '>';
- }
- $title[1] = ascii_html($v);
+ $title[1] = ascii_html($from[0]->name);
} elsif ($h eq 'Subject') {
$title[0] = $v->as_html;
+ if ($srch) {
+ my $path = $srch->subject_path($v->raw);
+ $rv .= "$h: ";
+ $rv .= $v->as_html . "\n";
+ next;
+ }
}
+ $rv .= "$h: " . $v->as_html . "\n";
+
}
my $header_obj = $mime->header_obj;
@@ -510,6 +559,9 @@
sub thread_replies {
my ($dst, $root, $res) = @_;
my @msgs = map { $_->mini_mime } @{$res->{msgs}};
+ foreach (@{$res->{msgs}}) {
+ print STDERR "smsg->path: <", $_->path, ">\n";
+ }
require PublicInbox::Thread;
$root->header_set('X-PI-TS', '0');
my $th = PublicInbox::Thread->new($root, @msgs);
@@ -532,7 +584,7 @@ sub thread_entry {
my ($dst, $state, $node, $level) = @_;
# $state = [ $search_res, $seen, undef, 0 (msg_nr) ];
# $seen is overloaded with 3 types of fields:
- # 1) "root" => Message-ID,
+ # 1) "root_anchor" => anchor_for(Message-ID),
# 2) seen subject hashes: sha1(subject) => 1
# 3) anchors hashes: "#$sha1_hex" (same as $seen in index_entry)
if (my $mime = $node->message) {
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 52e51c435cfbaf2a4cb6abbff0cde3d58af7dd28..7fe9b85be77dd02f22552e2f8199fd13ca7317d0 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -53,8 +53,13 @@ # thread display
} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) {
invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx, $cgi);
+ # subject_path display
+ } elsif ($path_info =~ m!$LISTNAME_RE/s/(\S+)\.html\z!o) {
+ my $sp = $2;
+ invalid_list(\%ctx, $1) || get_subject_path(\%ctx, $cgi, $sp);
+
# convenience redirects, order matters
- } elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t)/(\S+)\z!o) {
+ } elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t|s)/(\S+)\z!o) {
my $pfx = $2;
invalid_list_mid(\%ctx, $1, $3) ||
redirect_mid(\%ctx, $cgi, $2);
@@ -206,6 +211,19 @@ my $srch = searcher($ctx) or return need_search($ctx);
require PublicInbox::View;
my $foot = footer($ctx);
my $body = PublicInbox::View->thread_html($ctx, $foot, $srch) or
+ return r404();
+ [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
+ [ $body ] ];
+}
+
+# /$LISTNAME/s/$SUBJECT_PATH.html
+sub get_subject_path {
+ my ($ctx, $cgi, $sp) = @_;
+ $ctx->{subject_path} = $sp;
+ my $srch = searcher($ctx) or return need_search($ctx);
+ require PublicInbox::View;
+ my $foot = footer($ctx);
+ my $body = PublicInbox::View->subject_path_html($ctx, $foot, $srch) or
return r404();
[ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
[ $body ] ];
diff --git a/t/search.t b/t/search.t
index 9de6d2866652f13527b7987fd8a48f308f0ee1e4..9bdd3cee9964ae0ca03d1b97095a5cca3e79f52f 100644
--- a/t/search.t
+++ b/t/search.t
@@ -22,7 +22,7 @@ {
my $root = Email::MIME->create(
header_str => [
Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
- Subject => 'hello world',
+ Subject => 'Hello world',
'Message-ID' => '',
From => 'John Smith ',
To => 'list@example.com',
@@ -31,7 +31,7 @@ body => "\\m/\n");
my $last = Email::MIME->create(
header_str => [
Date => 'Sat, 02 Oct 2010 00:00:00 +0000',
- Subject => 'Re: hello world',
+ Subject => 'Re: Hello world',
'In-Reply-To' => '',
'Message-ID' => '',
From => 'John Smith ',
@@ -70,19 +70,19 @@ $res = $ro->query("path:$p");
is($res->{count}, 0, "path variant `$p' does not match");
}
- $res = $ro->query('subject:(hello world)');
+ $res = $ro->query('subject:(Hello world)');
@res = filter_mids($res);
is_deeply(\@res, \@exp, 'got expected results for subject:() match');
- $res = $ro->query('subject:"hello world"');
+ $res = $ro->query('subject:"Hello world"');
@res = filter_mids($res);
is_deeply(\@res, \@exp, 'got expected results for subject:"" match');
- $res = $ro->query('subject:"hello world"', {limit => 1});
+ $res = $ro->query('subject:"Hello world"', {limit => 1});
is(scalar @{$res->{msgs}}, 1, "limit works");
my $first = $res->{msgs}->[0];
- $res = $ro->query('subject:"hello world"', {offset => 1});
+ $res = $ro->query('subject:"Hello world"', {offset => 1});
is(scalar @{$res->{msgs}}, 1, "offset works");
my $second = $res->{msgs}->[0];
@@ -207,7 +207,7 @@ $rw->reopen;
$rw->add_message(Email::MIME->create(
header_str => [
Date => 'Sat, 02 Oct 2010 00:00:01 +0000',
- Subject => 'hello',
+ Subject => 'Hello',
'Message-ID' => '',
From => 'Quoter ',
To => 'list@example.com',
@@ -217,7 +217,7 @@
$rw->add_message(Email::MIME->create(
header_str => [
Date => 'Sat, 02 Oct 2010 00:00:02 +0000',
- Subject => 'hello',
+ Subject => 'Hello',
'Message-ID' => '',
From => 'Non-Quoter',
To => 'list@example.com',