]> Sergey Matveev's repositories - public-inbox.git/blobdiff - lib/PublicInbox/OverIdx.pm
overidx: each_by_mid: account for messages being deleted
[public-inbox.git] / lib / PublicInbox / OverIdx.pm
index e0893337e7843461740ad607ecff7aa5e7dd9b6f..985abbf4e693de8f01939556abca07504fa21eae 100644 (file)
@@ -158,7 +158,8 @@ SELECT $cols FROM over WHERE over.num = ? LIMIT 1
 
                foreach (@$nums) {
                        $sth->execute($_->[0]);
-                       my $smsg = $sth->fetchrow_hashref;
+                       # $cb may delete rows and invalidate nums
+                       my $smsg = $sth->fetchrow_hashref // next;
                        $smsg = PublicInbox::Over::load_from_row($smsg);
                        $cb->($self, $smsg, @arg) or return;
                }
@@ -243,12 +244,13 @@ sub link_refs {
        $tid;
 }
 
-# normalize subjects so they are suitable as pathnames for URLs
-# XXX: consider for removal
+# normalize subjects somewhat, they used to be ASCII-only but now
+# we use \w for UTF-8 support.  We may still drop it entirely and
+# rely on Xapian for subject matches...
 sub subject_path ($) {
        my ($subj) = @_;
        $subj = subject_normalized($subj);
-       $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
+       $subj =~ s![^\w\.~/\-]+!_!g;
        lc($subj);
 }
 
@@ -633,7 +635,7 @@ SELECT COUNT(*) FROM xref3 WHERE docid = ?
                # if deduplication rules in ContentHash change, it's
                # possible a docid can have multiple rows with the
                # same ibx_id.  This governs whether or not we call
-               # ->shard_remove_eidx_info in ExtSearchIdx.
+               # ->remove_eidx_info in ExtSearchIdx.
                $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
 SELECT COUNT(*) FROM xref3 WHERE docid = ? AND ibx_id = ?