From: Eric Wong (Contractor, The Linux Foundation) Date: Tue, 3 Apr 2018 11:09:10 +0000 (+0000) Subject: mbox: remove remaining OFFSET usage in SQLite X-Git-Tag: v1.1.0-pre1~68 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=0dceebd0a85774c92af247e6da5e2f5a0ee8417c mbox: remove remaining OFFSET usage in SQLite We can use id_batch in the common case to speed up full mbox retrievals. Gigantic msets are still a problem, but will be fixed in future commits. --- diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 05de6be1..0be19685 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -138,8 +138,12 @@ sub thread_mbox { my ($ctx, $srch, $sfx) = @_; eval { require IO::Compress::Gzip }; return sub { need_gzip(@_) } if $@; - - my $cb = sub { $srch->get_thread($ctx->{mid}, @_) }; + my $prev = 0; + my $cb = sub { + my $msgs = $srch->get_thread($ctx->{mid}, $prev); + $prev = $msgs->[-1]->{num} if scalar(@$msgs); + $msgs; + }; PublicInbox::MboxGz->response($ctx, $cb); } @@ -160,7 +164,25 @@ sub mbox_all { eval { require IO::Compress::Gzip }; return sub { need_gzip(@_) } if $@; - my $cb = sub { $ctx->{srch}->query($query, @_) }; + if ($query eq '') { + my $prev = 0; + my $msgs = []; + my $cb = sub { + $ctx->{-inbox}->mm->id_batch($prev, sub { + $msgs = $_[0]; + }); + $prev = $msgs->[-1] if @$msgs; + $msgs; + }; + return PublicInbox::MboxGz->response($ctx, $cb, 'all'); + } + my $opts = { offset => 0 }; + my $srch = $ctx->{srch}; + my $cb = sub { # called by MboxGz->getline + my $msgs = $srch->query($query, $opts); + $opts->{offset} += scalar @$msgs; + $msgs; + }; PublicInbox::MboxGz->response($ctx, $cb, 'results-'.$query); } @@ -192,7 +214,6 @@ sub new { cb => $cb, ctx => $ctx, msgs => [], - opts => { offset => 0 }, }, $class; } @@ -223,6 +244,10 @@ sub getline { do { # work on existing result set while (defined(my $smsg = shift @$msgs)) { + # id_batch may return integers + ref($smsg) or + $smsg = $ctx->{srch}->{over_ro}->get_art($smsg); + my $msg = eval { $ibx->msg_by_smsg($smsg) } or next; $msg = Email::Simple->new($msg); $gz->write(PublicInbox::Mbox::msg_str($ctx, $msg, @@ -247,10 +272,10 @@ sub getline { } # refill result set - $msgs = $self->{msgs} = $self->{cb}->($self->{opts}); - $self->{opts}->{offset} += scalar @$msgs; + $msgs = $self->{msgs} = $self->{cb}->(); } while (@$msgs); $gz->close; + # signal that we're done and can return undef next call: delete $self->{ctx}; ${delete $self->{buf}}; } diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index b230d44a..0bd6008b 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -50,9 +50,7 @@ sub do_get { my ($self, $sql, $opts, @args) = @_; my $dbh = $self->connect; my $lim = (($opts->{limit} || 0) + 0) || 1000; - my $off = (($opts->{offset} || 0) + 0) || 0; $sql .= "LIMIT $lim"; - $sql .= " OFFSET $off" if $off > 0; my $msgs = $dbh->selectall_arrayref($sql, { Slice => {} }, @args); load_from_row($_) for @$msgs; $msgs @@ -77,7 +75,7 @@ ORDER BY num ASC sub nothing () { wantarray ? (0, []) : [] }; sub get_thread { - my ($self, $mid, $opts) = @_; + my ($self, $mid, $prev) = @_; my $dbh = $self->connect; my $id = $dbh->selectrow_array(<<'', undef, $mid); @@ -96,13 +94,14 @@ SELECT tid,sid FROM over WHERE num = ? LIMIT 1 defined $tid or return nothing; # $sid may be undef - my $cond = 'FROM over WHERE (tid = ? OR sid = ?) AND num > 0'; - my $msgs = do_get($self, <<"", $opts, $tid, $sid); -SELECT * $cond ORDER BY ts ASC + $prev ||= 0; + my $cond = 'FROM over WHERE (tid = ? OR sid = ?) AND num > ?'; + my $msgs = do_get($self, <<"", {}, $tid, $sid, $prev); +SELECT * $cond ORDER BY num ASC return $msgs unless wantarray; - my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid); + my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid, $prev); SELECT COUNT(num) $cond ($nr, $msgs); diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index f7fdf854..eca2b0f5 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -179,8 +179,8 @@ sub query { } sub get_thread { - my ($self, $mid, $opts) = @_; - $self->{over_ro}->get_thread($mid, $opts); + my ($self, $mid, $prev) = @_; + $self->{over_ro}->get_thread($mid, $prev); } sub retry_reopen { diff --git a/t/psgi_v2.t b/t/psgi_v2.t index 31c4178b..aa3279cc 100644 --- a/t/psgi_v2.t +++ b/t/psgi_v2.t @@ -125,8 +125,28 @@ test_psgi(sub { $www->call(@_) }, sub { like($out, qr/^hello world$/m, 'got first in t.mbox.gz'); like($out, qr/^hello world!$/m, 'got second in t.mbox.gz'); like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz'); - @from_ = ($raw =~ m/^From /mg); + @from_ = ($out =~ m/^From /mg); is(scalar(@from_), 3, 'three From_ lines in t.mbox.gz'); + + # search interface + $res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m')); + $in = $res->content; + $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); + like($out, qr/^hello world$/m, 'got first in mbox POST'); + like($out, qr/^hello world!$/m, 'got second in mbox POST'); + like($out, qr/^hello ghosts$/m, 'got third in mbox POST'); + @from_ = ($out =~ m/^From /mg); + is(scalar(@from_), 3, 'three From_ lines in mbox POST'); + + # all.mbox.gz interface + $res = $cb->(GET('/v2test/all.mbox.gz')); + $in = $res->content; + $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); + like($out, qr/^hello world$/m, 'got first in all.mbox'); + like($out, qr/^hello world!$/m, 'got second in all.mbox'); + like($out, qr/^hello ghosts$/m, 'got third in all.mbox'); + @from_ = ($out =~ m/^From /mg); + is(scalar(@from_), 3, 'three From_ lines in all.mbox'); }; local $SIG{__WARN__} = 'DEFAULT';