]> Sergey Matveev's repositories - public-inbox.git/blobdiff - lib/PublicInbox/OverIdx.pm
overidx: subject_path: allow non-ASCII char in subject matches
[public-inbox.git] / lib / PublicInbox / OverIdx.pm
index 2e3d4534f125d92383f3e05e03fcf0ad10b6e3a1..0c8a4d9ee3f846d67cd514a41e1cbe8e916950be 100644 (file)
@@ -243,12 +243,13 @@ sub link_refs {
        $tid;
 }
 
-# normalize subjects so they are suitable as pathnames for URLs
-# XXX: consider for removal
+# normalize subjects somewhat, they used to be ASCII-only but now
+# we use \w for UTF-8 support.  We may still drop it entirely and
+# rely on Xapian for subject matches...
 sub subject_path ($) {
        my ($subj) = @_;
        $subj = subject_normalized($subj);
-       $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
+       $subj =~ s![^\w\.~/\-]+!_!g;
        lc($subj);
 }