]> Sergey Matveev's repositories - glocate.git/commitdiff
Return zstd back
authorSergey Matveev <stargrave@stargrave.org>
Mon, 8 Aug 2022 19:31:58 +0000 (22:31 +0300)
committerSergey Matveev <stargrave@stargrave.org>
Tue, 9 Aug 2022 13:34:25 +0000 (16:34 +0300)
README
diff.go
dirsizer.go
go.mod
go.sum
index.go
main.go
reader.go
writer.go

diff --git a/README b/README
index 358d5302a98ae9b877e6acf1c950d413e096a065959e58f8d3a631cabf2c1a37..8c52b350ce33c6e2b4f83ae2dd4fc980b342b8d3fa0b79ac2917988101f77477 100644 (file)
--- a/README
+++ b/README
@@ -18,7 +18,7 @@ memory, but that takes 2-3 GiBs of memory, that is huge amount. Moreover
 it fully loads it to perform any basic searches. So current
 implementation uses temporary files and heavy use of data streaming.
 
-Its storage format is trivial:
+Its storage format is simple: Zstandard-compressed list of records:
 
 * 16-bit BE size of the following name
 * entity (file, directory, symbolic link, etc) name itself.
diff --git a/diff.go b/diff.go
index 61ba1ac9f83e9f80cb3ac2a7b81e88079e1e9a2d3a8e8afabd34f0485b739d9f..dda4fd9ded52ee8109d30cbf1974527600de590be43819023d0df4ab8fbb91f4 100644 (file)
--- a/diff.go
+++ b/diff.go
@@ -105,7 +105,7 @@ func updateWithDiff(dbPath, strip string) *os.File {
        mods := make([]*Ent, 0, len(modsNames)+len(rens))
        if len(rens) > 0 {
                sort.Sort(BySrc(rens))
-               go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
+               go reader(db, entsReader)
                var ent Ent
                var ok, met bool
                for {
@@ -183,7 +183,7 @@ func updateWithDiff(dbPath, strip string) *os.File {
        entsReader = make(chan Ent, 1<<10)
        entsDirSizer := make(chan Ent, 1<<10)
        entsWriter := make(chan Ent, 1<<10)
-       go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
+       go reader(db, entsReader)
 
        dirSizerJob := make(chan struct{})
        var dirSizes []int64
@@ -193,10 +193,9 @@ func updateWithDiff(dbPath, strip string) *os.File {
                close(dirSizerJob)
        }()
 
-       bw := bufio.NewWriterSize(tmp0, 1<<17)
        writerJob := make(chan struct{})
        go func() {
-               writer(bw, entsWriter)
+               writer(tmp0, entsWriter)
                close(writerJob)
        }()
 
@@ -243,9 +242,6 @@ func updateWithDiff(dbPath, strip string) *os.File {
        <-dirSizerJob
        close(entsWriter)
        <-writerJob
-       if err = bw.Flush(); err != nil {
-               log.Fatalln(err)
-       }
 
        tmp1 := applyDirSizes(tmp0, dirSizes)
        tmp0.Close()
index b4f5b4e51c46905addae4a0e5a061a445d23f1a3d6e0e6ce0967c2f36d221a93..fda37f4fe46098cf687690e5e03984aba77f952ed5814359e94deefcccbbbdb3 100644 (file)
@@ -6,6 +6,8 @@ import (
        "io"
        "log"
        "os"
+
+       "github.com/klauspost/compress/zstd"
 )
 
 func dirSizer(dirSizes *[]int64, depth int, sinkBack, sinkIn, sinkOut chan Ent) (curSize int64) {
@@ -51,11 +53,22 @@ func applyDirSizes(src *os.File, dirSizes []int64) *os.File {
                log.Fatalln(err)
        }
 
-       br := bufio.NewReaderSize(src, 1<<17)
+       compR, err := zstd.NewReader(src)
+       if err != nil {
+               log.Fatalln(err)
+       }
+       br := bufio.NewReaderSize(compR, 1<<17)
+
+       compW, err := zstd.NewWriter(tmp,
+               zstd.WithEncoderLevel(zstd.SpeedBestCompression))
+       if err != nil {
+               log.Fatalln(err)
+       }
+       bw := bufio.NewWriterSize(compW, 1<<17)
+
        num := make([]byte, 8)
        var nameLen int
        name := make([]byte, 0, 1<<16)
-       bw := bufio.NewWriterSize(tmp, 1<<17)
        var dirIdx int
        for {
                if _, err = io.ReadFull(br, num[:2]); err != nil {
@@ -67,9 +80,7 @@ func applyDirSizes(src *os.File, dirSizes []int64) *os.File {
                mustWrite(bw, num[:2])
                nameLen = int(binary.BigEndian.Uint16(num[:2]))
                name = name[:nameLen]
-               if _, err = io.ReadFull(br, name); err != nil {
-                       log.Fatalln(err)
-               }
+               mustReadFull(br, name)
                mustWrite(bw, name)
                if _, err = io.CopyN(bw, br, 1+8); err != nil {
                        log.Fatalln(err)
@@ -90,5 +101,9 @@ func applyDirSizes(src *os.File, dirSizes []int64) *os.File {
        if err = bw.Flush(); err != nil {
                log.Fatalln(err)
        }
+       if err = compW.Close(); err != nil {
+               log.Fatalln(err)
+       }
+       compR.Close()
        return tmp
 }
diff --git a/go.mod b/go.mod
index f8654fc71f5b20da772216ab5c99b8f3d5c0d4e6f80285b811eb9ef3a932a85b..7d95a307792df055072c49908b254f1d7104f6872fe699f79df78e2f5f315f2c 100644 (file)
--- a/go.mod
+++ b/go.mod
@@ -3,3 +3,5 @@ module go.stargrave.org/glocate
 go 1.18
 
 require github.com/dustin/go-humanize v1.0.0
+
+require github.com/klauspost/compress v1.15.9
diff --git a/go.sum b/go.sum
index 4f89ea40df43dd72a9ee4bb17b5304a77cd360409cd05cc9431c6210cfbb63c5..568acaac67a8ceacd643b3927d665ea337dccc412d49088ad5449810553ca241 100644 (file)
--- a/go.sum
+++ b/go.sum
@@ -1,2 +1,4 @@
 github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
+github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
+github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
index c6fa5398b74fb4ace31a4f09587b8579e3b9642d55040d46e92a1f37fe5e99fb..c05e4c0b84d35c2d6d49beede0faf14c8f985fd98f2d42c1bf11a336f2f49381 100644 (file)
--- a/index.go
+++ b/index.go
@@ -1,7 +1,6 @@
 package main
 
 import (
-       "bufio"
        "fmt"
        "log"
        "os"
@@ -25,10 +24,9 @@ func index() *os.File {
                close(dirSizerJob)
        }()
 
-       bw := bufio.NewWriterSize(tmp0, 1<<17)
        writerJob := make(chan struct{})
        go func() {
-               writer(bw, entsWriter)
+               writer(tmp0, entsWriter)
                close(writerJob)
        }()
 
@@ -45,9 +43,6 @@ func index() *os.File {
        <-dirSizerJob
        close(entsWriter)
        <-writerJob
-       if err = bw.Flush(); err != nil {
-               log.Fatalln(err)
-       }
 
        tmp1 := applyDirSizes(tmp0, dirSizes)
        tmp0.Close()
diff --git a/main.go b/main.go
index a5fcb41451e9410c3be3d629ba89dea6d354c675abe9fd2fa7e0879514b10095..c8615fdd85d4f2b76db67f755348feb0a9baf32c17bfdab0b72473fdb634dd58 100644 (file)
--- a/main.go
+++ b/main.go
@@ -1,7 +1,6 @@
 package main
 
 import (
-       "bufio"
        "flag"
        "log"
        "os"
@@ -64,7 +63,7 @@ func main() {
                log.Fatalln(err)
        }
        entsReader := make(chan Ent, 1<<10)
-       go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
+       go reader(db, entsReader)
 
        entsPrinter := make(chan Ent, 1<<10)
        printerJob := make(chan struct{})
index 269fc3295dac975ee17b8a4007c9995da5813562f989a1ff4ce3f92366daa7eb..8900c26a75b3fd91ba83e1d45f9d923436981f3e84a45ea784cb151c474d50b4 100644 (file)
--- a/reader.go
+++ b/reader.go
@@ -1,9 +1,12 @@
 package main
 
 import (
+       "bufio"
        "encoding/binary"
        "io"
        "log"
+
+       "github.com/klauspost/compress/zstd"
 )
 
 func mustReadFull(r io.Reader, buf []byte) {
@@ -12,15 +15,20 @@ func mustReadFull(r io.Reader, buf []byte) {
        }
 }
 
-func reader(r io.Reader, sink chan Ent) {
-       var err error
+func reader(src io.Reader, sink chan Ent) {
+       comp, err := zstd.NewReader(src)
+       if err != nil {
+               log.Fatalln(err)
+       }
+       br := bufio.NewReaderSize(comp, 1<<17)
+
        num := make([]byte, 8)
        var cols []string
        var namePrev string
        var nameLen uint16
        var depth, depthPrev uint8
        for {
-               _, err = io.ReadFull(r, num[:2])
+               _, err = io.ReadFull(br, num[:2])
                if err != nil {
                        if err == io.EOF {
                                break
@@ -29,13 +37,13 @@ func reader(r io.Reader, sink chan Ent) {
                }
                nameLen = binary.BigEndian.Uint16(num[:2])
                nameRaw := make([]byte, nameLen)
-               mustReadFull(r, nameRaw)
+               mustReadFull(br, nameRaw)
                name := string(nameRaw)
-               mustReadFull(r, num[:1])
+               mustReadFull(br, num[:1])
                depth = uint8(num[0])
-               mustReadFull(r, num)
+               mustReadFull(br, num)
                ent := Ent{mtime: int64(binary.BigEndian.Uint64(num))}
-               mustReadFull(r, num)
+               mustReadFull(br, num)
                ent.size = int64(binary.BigEndian.Uint64(num))
                if depth > depthPrev {
                        cols = append(cols, namePrev[:len(namePrev)-1])
@@ -48,4 +56,5 @@ func reader(r io.Reader, sink chan Ent) {
                depthPrev = depth
        }
        close(sink)
+       comp.Close()
 }
index e12a19c0d7f2103ae9c88e4c4f3c237bb3d03f62819ea98eb33c93210f221256..75b82e57abc5e88fff375e6c1a4a69ae1335b4e2ce944ad401c940ee08e3c8e1 100644 (file)
--- a/writer.go
+++ b/writer.go
@@ -1,9 +1,12 @@
 package main
 
 import (
+       "bufio"
        "encoding/binary"
        "io"
        "log"
+
+       "github.com/klauspost/compress/zstd"
 )
 
 func mustWrite(w io.Writer, buf []byte) {
@@ -12,7 +15,12 @@ func mustWrite(w io.Writer, buf []byte) {
        }
 }
 
-func writer(w io.Writer, sink chan Ent) {
+func writer(dst io.Writer, sink chan Ent) {
+       comp, err := zstd.NewWriter(dst)
+       if err != nil {
+               log.Fatalln(err)
+       }
+       bw := bufio.NewWriterSize(comp, 1<<17)
        num := make([]byte, 8)
        var name string
        for ent := range sink {
@@ -21,15 +29,21 @@ func writer(w io.Writer, sink chan Ent) {
                        panic("too long")
                }
                binary.BigEndian.PutUint16(num[:2], uint16(len(name)))
-               mustWrite(w, num[:2])
-               mustWrite(w, []byte(name))
+               mustWrite(bw, num[:2])
+               mustWrite(bw, []byte(name))
                if len(ent.name) >= 1<<8 {
                        panic("too deep")
                }
-               mustWrite(w, []byte{byte(len(ent.name) - 1)})
+               mustWrite(bw, []byte{byte(len(ent.name) - 1)})
                binary.BigEndian.PutUint64(num, uint64(ent.mtime))
-               mustWrite(w, num)
+               mustWrite(bw, num)
                binary.BigEndian.PutUint64(num, uint64(ent.size))
-               mustWrite(w, num)
+               mustWrite(bw, num)
+       }
+       if err = bw.Flush(); err != nil {
+               log.Fatalln(err)
+       }
+       if err = comp.Close(); err != nil {
+               log.Fatalln(err)
        }
 }