From: Eric Wong Date: Thu, 20 Aug 2020 20:24:49 +0000 (+0000) Subject: searchview: use over.sqlite3 instead of Xapian docdata X-Git-Tag: v1.6.0~119 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=c703745bcd09af545ef1bd320c895778081488d2 searchview: use over.sqlite3 instead of Xapian docdata This is a step towards improving kernel page cache hit rates by relying on over.sqlite3 for document data instead of Xapian. Some micro-optimization to over->get_art was required to maintain performance. --- diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index 81b9fca7..80e57e62 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -57,6 +57,7 @@ sub new { sub disconnect { my ($self) = @_; if (my $dbh = delete $self->{dbh}) { + delete $self->{-get_art}; $self->{filename} = $dbh->sqlite_db_filename; } } @@ -201,8 +202,8 @@ SELECT COUNT(num) FROM over WHERE num > 0 sub get_art { my ($self, $num) = @_; - my $dbh = $self->connect; - my $sth = $dbh->prepare_cached(<<'', undef, 1); + # caching $sth ourselves is faster than prepare_cached + my $sth = $self->{-get_art} //= $self->connect->prepare(<<''); SELECT num,ds,ts,ddd FROM over WHERE num = ? LIMIT 1 $sth->execute($num); @@ -230,13 +231,7 @@ ORDER BY num ASC LIMIT 1 $sth->execute($$id, $$prev); my $num = $sth->fetchrow_array or return; $$prev = $num; - - $sth = $dbh->prepare_cached(<<"", undef, 1); -SELECT num,ts,ds,ddd FROM over WHERE num = ? LIMIT 1 - - $sth->execute($num); - my $smsg = $sth->fetchrow_hashref or return; - load_from_row($smsg); + get_art($self, $num); } # IMAP search, this is limited by callers to UID_SLICE size (50K) @@ -278,10 +273,7 @@ sub check_inodes { my $st = pack('dd', $st[0], $st[1]); # don't actually reopen, just let {dbh} be recreated later - if ($st ne ($self->{st} // $st)) { - delete($self->{dbh}); - $self->{filename} = $f; - } + disconnect($self) if $st ne ($self->{st} // $st); } else { warn "W: stat $f: $!\n"; } diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 28d9ce5d..61534c25 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -13,6 +13,7 @@ use PublicInbox::WwwAtomStream; use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::SearchThread; use PublicInbox::SearchQuery; +use PublicInbox::Search qw(mdocid); my %rmap_inc; sub mbox_results { @@ -90,19 +91,22 @@ sub mset_summary { my $pfx = ' ' x $pad; my $res = \($ctx->{-html_tip}); my $ibx = $ctx->{-inbox}; - my $srch = $ibx->search; + my $over = $ibx->over; + my $nshard = $ibx->search->{nshard} // 1; my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef; foreach my $m ($mset->items) { my $rank = sprintf("%${pad}d", $m->get_rank + 1); my $pct = get_pct($m); - my $smsg = PublicInbox::Smsg::from_mitem($m, $srch); + my $num = mdocid($nshard, $m); + my $smsg = $over->get_art($num, 1); unless ($smsg) { eval { - $m = "$m ".$m->get_docid . " expired\n"; + $m = "$m $num expired\n"; $ctx->{env}->{'psgi.errors'}->print($m); }; next; } + PublicInbox::Smsg::psgi_cull($smsg); my $s = ascii_html($smsg->{subject}); my $f = ascii_html($smsg->{from_name}); if ($obfs_ibx) {