This should bring us closer to the "Base subject" definition in
IMAP ORDEREDSUBJECT (RFC 5256 2.1). Larger changes may cause
some breakage (until --reindex). But for now, a reindex will
prevents the non-ASCII subjects from being normalized to the
same fuzzy "thread" in the thread view.
$tid;
}
-# normalize subjects so they are suitable as pathnames for URLs
-# XXX: consider for removal
+# normalize subjects somewhat, they used to be ASCII-only but now
+# we use \w for UTF-8 support. We may still drop it entirely and
+# rely on Xapian for subject matches...
sub subject_path ($) {
my ($subj) = @_;
$subj = subject_normalized($subj);
- $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
+ $subj =~ s![^\w\.~/\-]+!_!g;
lc($subj);
}
our $REPLY_RE = qr/^re:\s+/i;
+# TODO: see RFC 5256 sec 2.1 "Base Subject" and evaluate compatibility
+# w/ existing indices...
sub subject_normalized ($) {
my ($subj) = @_;
$subj =~ s/\A\s+//s; # no leading space