sqlite storage: Track total blob data size manually

author Matt Joiner <anacrolix@gmail.com>

Tue, 3 Nov 2020 02:10:17 +0000 (13:10 +1100)

committer Matt Joiner <anacrolix@gmail.com>

Tue, 3 Nov 2020 02:10:17 +0000 (13:10 +1100)
author Matt Joiner <anacrolix@gmail.com>
Tue, 3 Nov 2020 02:10:17 +0000 (13:10 +1100)
committer Matt Joiner <anacrolix@gmail.com>
Tue, 3 Nov 2020 02:10:17 +0000 (13:10 +1100)
diff --git a/storage/sqlite/sqlite-storage.go b/storage/sqlite/sqlite-storage.go

index 295daa3fcb6fd16c7f3b80e6c54dbec3ba5e6ff4..44af1fdf6537b5f981ddd086c8210b49a9bffbad 100644 (file)
--- a/storage/sqlite/sqlite-storage.go
+++ b/storage/sqlite/sqlite-storage.go
@@ -23,7 +23,13 @@ import (
  type conn = *sqlite.Conn
  
  func initConn(conn conn, wal bool) error {
-       err := sqlitex.ExecTransient(conn, `pragma synchronous=off`, nil)
+       // Recursive triggers are required because we need to trim the blob_meta size after trimming to
+       // capacity. Hopefully we don't hit the recursion limit, and if we do, there's an error thrown.
+       err := sqlitex.ExecTransient(conn, "pragma recursive_triggers=on", nil)
+       if err != nil {
+               return err
+       }
+       err = sqlitex.ExecTransient(conn, `pragma synchronous=off`, nil)
         if err != nil {
                 return err
         }
@@ -42,48 +48,81 @@ func initConn(conn conn, wal bool) error {
  
  func initSchema(conn conn) error {
         return sqlitex.ExecScript(conn, `
+-- We have to opt into this before creating any tables, or before a vacuum to enable it. It means we
+-- can trim the database file size with partial vacuums without having to do a full vacuum, which 
+-- locks everything.
  pragma auto_vacuum=incremental;
  
-create table if not exists blob(
+create table if not exists blob (
         name text,
         last_used timestamp default (datetime('now')),
         data blob,
         primary key (name)
  );
  
-create table if not exists setting(
+create table if not exists blob_meta (
+       key text primary key,
+       value
+);
+
+-- While sqlite *seems* to be faster to get sum(length(data)) instead of 
+-- sum(length(cast(data as blob))), it may still require a large table scan at start-up or with a 
+-- cold-cache. With this we can be assured that it doesn't.
+insert or ignore into blob_meta values ('size', 0);
+
+create table if not exists setting (
         name primary key on conflict replace,
         value
  );
  
  create view if not exists deletable_blob as
-with recursive excess(
+with recursive excess (
         usage_with,
         last_used,
         blob_rowid,
         data_length
  ) as (
-       select * from (select (select sum(length(cast(data as blob))) from blob) as usage_with, last_used, rowid, length(cast(data as blob)) from blob order by last_used, rowid limit 1)
-               where usage_with >= (select value from setting where name='capacity')
+       select * 
+       from (
+               select 
+                       (select value from blob_meta where key='size') as usage_with,
+                       last_used,
+                       rowid,
+                       length(cast(data as blob))
+               from blob order by last_used, rowid limit 1
+       )
+       where usage_with >= (select value from setting where name='capacity')
         union all
-       select usage_with-data_length, blob.last_used, blob.rowid, length(cast(data as blob)) from excess join blob
-               on blob.rowid=(select rowid from blob where (last_used, rowid) > (excess.last_used, blob_rowid))
+       select 
+               usage_with-data_length,
+               blob.last_used,
+               blob.rowid,
+               length(cast(data as blob))
+       from excess join blob
+       on blob.rowid=(select rowid from blob where (last_used, rowid) > (excess.last_used, blob_rowid))
         where usage_with >= (select value from setting where name='capacity')
-) select * from excess;
+)
+select * from excess;
  
-create trigger if not exists trim_blobs_to_capacity_after_update
-after update of data on blob
-when length(new.data)>length(old.data) and (select sum(length(cast(data as blob))) from blob)>(select value from setting where name='capacity')
-begin 
+create trigger if not exists after_insert_blob
+after insert on blob
+begin
+       update blob_meta set value=value+length(cast(new.data as blob)) where key='size';
         delete from blob where rowid in (select blob_rowid from deletable_blob);
  end;
  
-create trigger if not exists trim_blobs_to_capacity_after_insert 
-after insert on blob
-when (select sum(length(cast(data as blob))) from blob)>(select value from setting where name='capacity')
+create trigger if not exists after_update_blob
+after update of data on blob
  begin
+       update blob_meta set value=value+length(cast(new.data as blob))-length(cast(old.data as blob)) where key='size';
         delete from blob where rowid in (select blob_rowid from deletable_blob);
  end;
+
+create trigger if not exists after_delete_blob
+after delete on blob
+begin
+       update blob_meta set value=value-length(cast(old.data as blob)) where key='size';
+end;
  `)
  }
author	Matt Joiner <anacrolix@gmail.com>
	Tue, 3 Nov 2020 02:10:17 +0000 (13:10 +1100)
committer	Matt Joiner <anacrolix@gmail.com>
	Tue, 3 Nov 2020 02:10:17 +0000 (13:10 +1100)