]> Sergey Matveev's repositories - glocate.git/commitdiff
Return zstd back
authorSergey Matveev <stargrave@stargrave.org>
Mon, 8 Aug 2022 19:31:58 +0000 (22:31 +0300)
committerSergey Matveev <stargrave@stargrave.org>
Tue, 9 Aug 2022 13:34:25 +0000 (16:34 +0300)
README
diff.go
dirsizer.go
go.mod
go.sum
index.go
main.go
reader.go
writer.go

diff --git a/README b/README
index 358d5302a98ae9b877e6acf1c950d413e096a065959e58f8d3a631cabf2c1a37..8c52b350ce33c6e2b4f83ae2dd4fc980b342b8d3fa0b79ac2917988101f77477 100644 (file)
--- a/README
+++ b/README
@@ -18,7 +18,7 @@ memory, but that takes 2-3 GiBs of memory, that is huge amount. Moreover
 it fully loads it to perform any basic searches. So current
 implementation uses temporary files and heavy use of data streaming.
 
 it fully loads it to perform any basic searches. So current
 implementation uses temporary files and heavy use of data streaming.
 
-Its storage format is trivial:
+Its storage format is simple: Zstandard-compressed list of records:
 
 * 16-bit BE size of the following name
 * entity (file, directory, symbolic link, etc) name itself.
 
 * 16-bit BE size of the following name
 * entity (file, directory, symbolic link, etc) name itself.
diff --git a/diff.go b/diff.go
index 61ba1ac9f83e9f80cb3ac2a7b81e88079e1e9a2d3a8e8afabd34f0485b739d9f..dda4fd9ded52ee8109d30cbf1974527600de590be43819023d0df4ab8fbb91f4 100644 (file)
--- a/diff.go
+++ b/diff.go
@@ -105,7 +105,7 @@ func updateWithDiff(dbPath, strip string) *os.File {
        mods := make([]*Ent, 0, len(modsNames)+len(rens))
        if len(rens) > 0 {
                sort.Sort(BySrc(rens))
        mods := make([]*Ent, 0, len(modsNames)+len(rens))
        if len(rens) > 0 {
                sort.Sort(BySrc(rens))
-               go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
+               go reader(db, entsReader)
                var ent Ent
                var ok, met bool
                for {
                var ent Ent
                var ok, met bool
                for {
@@ -183,7 +183,7 @@ func updateWithDiff(dbPath, strip string) *os.File {
        entsReader = make(chan Ent, 1<<10)
        entsDirSizer := make(chan Ent, 1<<10)
        entsWriter := make(chan Ent, 1<<10)
        entsReader = make(chan Ent, 1<<10)
        entsDirSizer := make(chan Ent, 1<<10)
        entsWriter := make(chan Ent, 1<<10)
-       go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
+       go reader(db, entsReader)
 
        dirSizerJob := make(chan struct{})
        var dirSizes []int64
 
        dirSizerJob := make(chan struct{})
        var dirSizes []int64
@@ -193,10 +193,9 @@ func updateWithDiff(dbPath, strip string) *os.File {
                close(dirSizerJob)
        }()
 
                close(dirSizerJob)
        }()
 
-       bw := bufio.NewWriterSize(tmp0, 1<<17)
        writerJob := make(chan struct{})
        go func() {
        writerJob := make(chan struct{})
        go func() {
-               writer(bw, entsWriter)
+               writer(tmp0, entsWriter)
                close(writerJob)
        }()
 
                close(writerJob)
        }()
 
@@ -243,9 +242,6 @@ func updateWithDiff(dbPath, strip string) *os.File {
        <-dirSizerJob
        close(entsWriter)
        <-writerJob
        <-dirSizerJob
        close(entsWriter)
        <-writerJob
-       if err = bw.Flush(); err != nil {
-               log.Fatalln(err)
-       }
 
        tmp1 := applyDirSizes(tmp0, dirSizes)
        tmp0.Close()
 
        tmp1 := applyDirSizes(tmp0, dirSizes)
        tmp0.Close()
index b4f5b4e51c46905addae4a0e5a061a445d23f1a3d6e0e6ce0967c2f36d221a93..fda37f4fe46098cf687690e5e03984aba77f952ed5814359e94deefcccbbbdb3 100644 (file)
@@ -6,6 +6,8 @@ import (
        "io"
        "log"
        "os"
        "io"
        "log"
        "os"
+
+       "github.com/klauspost/compress/zstd"
 )
 
 func dirSizer(dirSizes *[]int64, depth int, sinkBack, sinkIn, sinkOut chan Ent) (curSize int64) {
 )
 
 func dirSizer(dirSizes *[]int64, depth int, sinkBack, sinkIn, sinkOut chan Ent) (curSize int64) {
@@ -51,11 +53,22 @@ func applyDirSizes(src *os.File, dirSizes []int64) *os.File {
                log.Fatalln(err)
        }
 
                log.Fatalln(err)
        }
 
-       br := bufio.NewReaderSize(src, 1<<17)
+       compR, err := zstd.NewReader(src)
+       if err != nil {
+               log.Fatalln(err)
+       }
+       br := bufio.NewReaderSize(compR, 1<<17)
+
+       compW, err := zstd.NewWriter(tmp,
+               zstd.WithEncoderLevel(zstd.SpeedBestCompression))
+       if err != nil {
+               log.Fatalln(err)
+       }
+       bw := bufio.NewWriterSize(compW, 1<<17)
+
        num := make([]byte, 8)
        var nameLen int
        name := make([]byte, 0, 1<<16)
        num := make([]byte, 8)
        var nameLen int
        name := make([]byte, 0, 1<<16)
-       bw := bufio.NewWriterSize(tmp, 1<<17)
        var dirIdx int
        for {
                if _, err = io.ReadFull(br, num[:2]); err != nil {
        var dirIdx int
        for {
                if _, err = io.ReadFull(br, num[:2]); err != nil {
@@ -67,9 +80,7 @@ func applyDirSizes(src *os.File, dirSizes []int64) *os.File {
                mustWrite(bw, num[:2])
                nameLen = int(binary.BigEndian.Uint16(num[:2]))
                name = name[:nameLen]
                mustWrite(bw, num[:2])
                nameLen = int(binary.BigEndian.Uint16(num[:2]))
                name = name[:nameLen]
-               if _, err = io.ReadFull(br, name); err != nil {
-                       log.Fatalln(err)
-               }
+               mustReadFull(br, name)
                mustWrite(bw, name)
                if _, err = io.CopyN(bw, br, 1+8); err != nil {
                        log.Fatalln(err)
                mustWrite(bw, name)
                if _, err = io.CopyN(bw, br, 1+8); err != nil {
                        log.Fatalln(err)
@@ -90,5 +101,9 @@ func applyDirSizes(src *os.File, dirSizes []int64) *os.File {
        if err = bw.Flush(); err != nil {
                log.Fatalln(err)
        }
        if err = bw.Flush(); err != nil {
                log.Fatalln(err)
        }
+       if err = compW.Close(); err != nil {
+               log.Fatalln(err)
+       }
+       compR.Close()
        return tmp
 }
        return tmp
 }
diff --git a/go.mod b/go.mod
index f8654fc71f5b20da772216ab5c99b8f3d5c0d4e6f80285b811eb9ef3a932a85b..7d95a307792df055072c49908b254f1d7104f6872fe699f79df78e2f5f315f2c 100644 (file)
--- a/go.mod
+++ b/go.mod
@@ -3,3 +3,5 @@ module go.stargrave.org/glocate
 go 1.18
 
 require github.com/dustin/go-humanize v1.0.0
 go 1.18
 
 require github.com/dustin/go-humanize v1.0.0
+
+require github.com/klauspost/compress v1.15.9
diff --git a/go.sum b/go.sum
index 4f89ea40df43dd72a9ee4bb17b5304a77cd360409cd05cc9431c6210cfbb63c5..568acaac67a8ceacd643b3927d665ea337dccc412d49088ad5449810553ca241 100644 (file)
--- a/go.sum
+++ b/go.sum
@@ -1,2 +1,4 @@
 github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
 github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
+github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
+github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
index c6fa5398b74fb4ace31a4f09587b8579e3b9642d55040d46e92a1f37fe5e99fb..c05e4c0b84d35c2d6d49beede0faf14c8f985fd98f2d42c1bf11a336f2f49381 100644 (file)
--- a/index.go
+++ b/index.go
@@ -1,7 +1,6 @@
 package main
 
 import (
 package main
 
 import (
-       "bufio"
        "fmt"
        "log"
        "os"
        "fmt"
        "log"
        "os"
@@ -25,10 +24,9 @@ func index() *os.File {
                close(dirSizerJob)
        }()
 
                close(dirSizerJob)
        }()
 
-       bw := bufio.NewWriterSize(tmp0, 1<<17)
        writerJob := make(chan struct{})
        go func() {
        writerJob := make(chan struct{})
        go func() {
-               writer(bw, entsWriter)
+               writer(tmp0, entsWriter)
                close(writerJob)
        }()
 
                close(writerJob)
        }()
 
@@ -45,9 +43,6 @@ func index() *os.File {
        <-dirSizerJob
        close(entsWriter)
        <-writerJob
        <-dirSizerJob
        close(entsWriter)
        <-writerJob
-       if err = bw.Flush(); err != nil {
-               log.Fatalln(err)
-       }
 
        tmp1 := applyDirSizes(tmp0, dirSizes)
        tmp0.Close()
 
        tmp1 := applyDirSizes(tmp0, dirSizes)
        tmp0.Close()
diff --git a/main.go b/main.go
index a5fcb41451e9410c3be3d629ba89dea6d354c675abe9fd2fa7e0879514b10095..c8615fdd85d4f2b76db67f755348feb0a9baf32c17bfdab0b72473fdb634dd58 100644 (file)
--- a/main.go
+++ b/main.go
@@ -1,7 +1,6 @@
 package main
 
 import (
 package main
 
 import (
-       "bufio"
        "flag"
        "log"
        "os"
        "flag"
        "log"
        "os"
@@ -64,7 +63,7 @@ func main() {
                log.Fatalln(err)
        }
        entsReader := make(chan Ent, 1<<10)
                log.Fatalln(err)
        }
        entsReader := make(chan Ent, 1<<10)
-       go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
+       go reader(db, entsReader)
 
        entsPrinter := make(chan Ent, 1<<10)
        printerJob := make(chan struct{})
 
        entsPrinter := make(chan Ent, 1<<10)
        printerJob := make(chan struct{})
index 269fc3295dac975ee17b8a4007c9995da5813562f989a1ff4ce3f92366daa7eb..8900c26a75b3fd91ba83e1d45f9d923436981f3e84a45ea784cb151c474d50b4 100644 (file)
--- a/reader.go
+++ b/reader.go
@@ -1,9 +1,12 @@
 package main
 
 import (
 package main
 
 import (
+       "bufio"
        "encoding/binary"
        "io"
        "log"
        "encoding/binary"
        "io"
        "log"
+
+       "github.com/klauspost/compress/zstd"
 )
 
 func mustReadFull(r io.Reader, buf []byte) {
 )
 
 func mustReadFull(r io.Reader, buf []byte) {
@@ -12,15 +15,20 @@ func mustReadFull(r io.Reader, buf []byte) {
        }
 }
 
        }
 }
 
-func reader(r io.Reader, sink chan Ent) {
-       var err error
+func reader(src io.Reader, sink chan Ent) {
+       comp, err := zstd.NewReader(src)
+       if err != nil {
+               log.Fatalln(err)
+       }
+       br := bufio.NewReaderSize(comp, 1<<17)
+
        num := make([]byte, 8)
        var cols []string
        var namePrev string
        var nameLen uint16
        var depth, depthPrev uint8
        for {
        num := make([]byte, 8)
        var cols []string
        var namePrev string
        var nameLen uint16
        var depth, depthPrev uint8
        for {
-               _, err = io.ReadFull(r, num[:2])
+               _, err = io.ReadFull(br, num[:2])
                if err != nil {
                        if err == io.EOF {
                                break
                if err != nil {
                        if err == io.EOF {
                                break
@@ -29,13 +37,13 @@ func reader(r io.Reader, sink chan Ent) {
                }
                nameLen = binary.BigEndian.Uint16(num[:2])
                nameRaw := make([]byte, nameLen)
                }
                nameLen = binary.BigEndian.Uint16(num[:2])
                nameRaw := make([]byte, nameLen)
-               mustReadFull(r, nameRaw)
+               mustReadFull(br, nameRaw)
                name := string(nameRaw)
                name := string(nameRaw)
-               mustReadFull(r, num[:1])
+               mustReadFull(br, num[:1])
                depth = uint8(num[0])
                depth = uint8(num[0])
-               mustReadFull(r, num)
+               mustReadFull(br, num)
                ent := Ent{mtime: int64(binary.BigEndian.Uint64(num))}
                ent := Ent{mtime: int64(binary.BigEndian.Uint64(num))}
-               mustReadFull(r, num)
+               mustReadFull(br, num)
                ent.size = int64(binary.BigEndian.Uint64(num))
                if depth > depthPrev {
                        cols = append(cols, namePrev[:len(namePrev)-1])
                ent.size = int64(binary.BigEndian.Uint64(num))
                if depth > depthPrev {
                        cols = append(cols, namePrev[:len(namePrev)-1])
@@ -48,4 +56,5 @@ func reader(r io.Reader, sink chan Ent) {
                depthPrev = depth
        }
        close(sink)
                depthPrev = depth
        }
        close(sink)
+       comp.Close()
 }
 }
index e12a19c0d7f2103ae9c88e4c4f3c237bb3d03f62819ea98eb33c93210f221256..75b82e57abc5e88fff375e6c1a4a69ae1335b4e2ce944ad401c940ee08e3c8e1 100644 (file)
--- a/writer.go
+++ b/writer.go
@@ -1,9 +1,12 @@
 package main
 
 import (
 package main
 
 import (
+       "bufio"
        "encoding/binary"
        "io"
        "log"
        "encoding/binary"
        "io"
        "log"
+
+       "github.com/klauspost/compress/zstd"
 )
 
 func mustWrite(w io.Writer, buf []byte) {
 )
 
 func mustWrite(w io.Writer, buf []byte) {
@@ -12,7 +15,12 @@ func mustWrite(w io.Writer, buf []byte) {
        }
 }
 
        }
 }
 
-func writer(w io.Writer, sink chan Ent) {
+func writer(dst io.Writer, sink chan Ent) {
+       comp, err := zstd.NewWriter(dst)
+       if err != nil {
+               log.Fatalln(err)
+       }
+       bw := bufio.NewWriterSize(comp, 1<<17)
        num := make([]byte, 8)
        var name string
        for ent := range sink {
        num := make([]byte, 8)
        var name string
        for ent := range sink {
@@ -21,15 +29,21 @@ func writer(w io.Writer, sink chan Ent) {
                        panic("too long")
                }
                binary.BigEndian.PutUint16(num[:2], uint16(len(name)))
                        panic("too long")
                }
                binary.BigEndian.PutUint16(num[:2], uint16(len(name)))
-               mustWrite(w, num[:2])
-               mustWrite(w, []byte(name))
+               mustWrite(bw, num[:2])
+               mustWrite(bw, []byte(name))
                if len(ent.name) >= 1<<8 {
                        panic("too deep")
                }
                if len(ent.name) >= 1<<8 {
                        panic("too deep")
                }
-               mustWrite(w, []byte{byte(len(ent.name) - 1)})
+               mustWrite(bw, []byte{byte(len(ent.name) - 1)})
                binary.BigEndian.PutUint64(num, uint64(ent.mtime))
                binary.BigEndian.PutUint64(num, uint64(ent.mtime))
-               mustWrite(w, num)
+               mustWrite(bw, num)
                binary.BigEndian.PutUint64(num, uint64(ent.size))
                binary.BigEndian.PutUint64(num, uint64(ent.size))
-               mustWrite(w, num)
+               mustWrite(bw, num)
+       }
+       if err = bw.Flush(); err != nil {
+               log.Fatalln(err)
+       }
+       if err = comp.Close(); err != nil {
+               log.Fatalln(err)
        }
 }
        }
 }