X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FOverIdx.pm;h=d6d706f7fed04363d409eb7aa34564f08ad7dc2e;hb=ed0167d2a851b4f5128f57ad60309a0b76e62cfa;hp=8f7cf2bb97228b49f4b653ee94ecfa90d74dc3f4;hpb=8ef622d408d2e4d98ad3aada8466f539c9ac61ba;p=public-inbox.git diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 8f7cf2bb..d6d706f7 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -158,7 +158,8 @@ SELECT $cols FROM over WHERE over.num = ? LIMIT 1 foreach (@$nums) { $sth->execute($_->[0]); - my $smsg = $sth->fetchrow_hashref; + # $cb may delete rows and invalidate nums + my $smsg = $sth->fetchrow_hashref // next; $smsg = PublicInbox::Over::load_from_row($smsg); $cb->($self, $smsg, @arg) or return; } @@ -243,12 +244,13 @@ sub link_refs { $tid; } -# normalize subjects so they are suitable as pathnames for URLs -# XXX: consider for removal +# normalize subjects somewhat, they used to be ASCII-only but now +# we use \w for UTF-8 support. We may still drop it entirely and +# rely on Xapian for subject matches... sub subject_path ($) { my ($subj) = @_; $subj = subject_normalized($subj); - $subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g; + $subj =~ s![^\w\.~/\-]+!_!g; lc($subj); } @@ -463,8 +465,8 @@ sub create { }; unless (-r $fn) { require File::Path; - require File::Basename; - File::Path::mkpath(File::Basename::dirname($fn)); + my ($dir) = ($fn =~ m!(.*?/)[^/]+\z!); + File::Path::mkpath($dir); } # create the DB: PublicInbox::Over::dbh($self); @@ -541,9 +543,13 @@ CREATE TABLE IF NOT EXISTS xref3 ( $dbh->do('CREATE INDEX IF NOT EXISTS idx_docid ON xref3 (docid)'); # performance critical, this is not UNIQUE since we may need to - # tolerate some old bugs from indexing mirrors - $dbh->do('CREATE INDEX IF NOT EXISTS idx_nntp ON '. - 'xref3 (oidbin,xnum,ibx_id)'); + # tolerate some old bugs from indexing mirrors. n.b. we used + # to index oidbin here, but leaving it out speeds up reindexing + # and "XHDR Xref <$MSGID>" isn't any slower w/o oidbin + $dbh->do('CREATE INDEX IF NOT EXISTS idx_reindex ON '. + 'xref3 (xnum,ibx_id)'); + + $dbh->do('CREATE INDEX IF NOT EXISTS idx_oidbin ON xref3 (oidbin)'); $dbh->do(<<''); CREATE TABLE IF NOT EXISTS eidx_meta ( @@ -600,50 +606,6 @@ INSERT OR IGNORE INTO xref3 (docid, ibx_id, xnum, oidbin) VALUES (?, ?, ?, ?) $sth->execute; } -# returns remaining reference count to $docid -sub remove_xref3 { - my ($self, $docid, $oidhex, $eidx_key, $rm_eidx_info) = @_; - begin_lazy($self); - my $oidbin = pack('H*', $oidhex); - my ($sth, $ibx_id); - if (defined $eidx_key) { - $ibx_id = ibx_id($self, $eidx_key); - $sth = $self->{dbh}->prepare_cached(<<''); -DELETE FROM xref3 WHERE docid = ? AND ibx_id = ? AND oidbin = ? - - $sth->bind_param(1, $docid); - $sth->bind_param(2, $ibx_id); - $sth->bind_param(3, $oidbin, SQL_BLOB); - } else { - $sth = $self->{dbh}->prepare_cached(<<''); -DELETE FROM xref3 WHERE docid = ? AND oidbin = ? - - $sth->bind_param(1, $docid); - $sth->bind_param(2, $oidbin, SQL_BLOB); - } - $sth->execute; - $sth = $self->{dbh}->prepare_cached(<<'', undef, 1); -SELECT COUNT(*) FROM xref3 WHERE docid = ? - - $sth->execute($docid); - my $nr = $sth->fetchrow_array; - if ($nr == 0) { - delete_by_num($self, $docid); - } elsif (defined($ibx_id) && $rm_eidx_info) { - # if deduplication rules in ContentHash change, it's - # possible a docid can have multiple rows with the - # same ibx_id. This governs whether or not we call - # ->shard_remove_eidx_info in ExtSearchIdx. - $sth = $self->{dbh}->prepare_cached(<<'', undef, 1); -SELECT COUNT(*) FROM xref3 WHERE docid = ? AND ibx_id = ? - - $sth->execute($docid, $ibx_id); - my $count = $sth->fetchrow_array; - $$rm_eidx_info = ($count == 0); - } - $nr; -} - # for when an xref3 goes missing, this does NOT update {ts} sub update_blob { my ($self, $smsg, $oidhex) = @_;