it fully loads it to perform any basic searches. So current
implementation uses temporary files and heavy use of data streaming.
-Its storage format is trivial:
+Its storage format is simple: Zstandard-compressed list of records:
* 16-bit BE size of the following name
* entity (file, directory, symbolic link, etc) name itself.
mods := make([]*Ent, 0, len(modsNames)+len(rens))
if len(rens) > 0 {
sort.Sort(BySrc(rens))
- go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
+ go reader(db, entsReader)
var ent Ent
var ok, met bool
for {
entsReader = make(chan Ent, 1<<10)
entsDirSizer := make(chan Ent, 1<<10)
entsWriter := make(chan Ent, 1<<10)
- go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
+ go reader(db, entsReader)
dirSizerJob := make(chan struct{})
var dirSizes []int64
close(dirSizerJob)
}()
- bw := bufio.NewWriterSize(tmp0, 1<<17)
writerJob := make(chan struct{})
go func() {
- writer(bw, entsWriter)
+ writer(tmp0, entsWriter)
close(writerJob)
}()
<-dirSizerJob
close(entsWriter)
<-writerJob
- if err = bw.Flush(); err != nil {
- log.Fatalln(err)
- }
tmp1 := applyDirSizes(tmp0, dirSizes)
tmp0.Close()
"io"
"log"
"os"
+
+ "github.com/klauspost/compress/zstd"
)
func dirSizer(dirSizes *[]int64, depth int, sinkBack, sinkIn, sinkOut chan Ent) (curSize int64) {
log.Fatalln(err)
}
- br := bufio.NewReaderSize(src, 1<<17)
+ compR, err := zstd.NewReader(src)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ br := bufio.NewReaderSize(compR, 1<<17)
+
+ compW, err := zstd.NewWriter(tmp,
+ zstd.WithEncoderLevel(zstd.SpeedBestCompression))
+ if err != nil {
+ log.Fatalln(err)
+ }
+ bw := bufio.NewWriterSize(compW, 1<<17)
+
num := make([]byte, 8)
var nameLen int
name := make([]byte, 0, 1<<16)
- bw := bufio.NewWriterSize(tmp, 1<<17)
var dirIdx int
for {
if _, err = io.ReadFull(br, num[:2]); err != nil {
mustWrite(bw, num[:2])
nameLen = int(binary.BigEndian.Uint16(num[:2]))
name = name[:nameLen]
- if _, err = io.ReadFull(br, name); err != nil {
- log.Fatalln(err)
- }
+ mustReadFull(br, name)
mustWrite(bw, name)
if _, err = io.CopyN(bw, br, 1+8); err != nil {
log.Fatalln(err)
if err = bw.Flush(); err != nil {
log.Fatalln(err)
}
+ if err = compW.Close(); err != nil {
+ log.Fatalln(err)
+ }
+ compR.Close()
return tmp
}
go 1.18
require github.com/dustin/go-humanize v1.0.0
+
+require github.com/klauspost/compress v1.15.9
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
+github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
+github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
package main
import (
- "bufio"
"fmt"
"log"
"os"
close(dirSizerJob)
}()
- bw := bufio.NewWriterSize(tmp0, 1<<17)
writerJob := make(chan struct{})
go func() {
- writer(bw, entsWriter)
+ writer(tmp0, entsWriter)
close(writerJob)
}()
<-dirSizerJob
close(entsWriter)
<-writerJob
- if err = bw.Flush(); err != nil {
- log.Fatalln(err)
- }
tmp1 := applyDirSizes(tmp0, dirSizes)
tmp0.Close()
package main
import (
- "bufio"
"flag"
"log"
"os"
log.Fatalln(err)
}
entsReader := make(chan Ent, 1<<10)
- go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
+ go reader(db, entsReader)
entsPrinter := make(chan Ent, 1<<10)
printerJob := make(chan struct{})
package main
import (
+ "bufio"
"encoding/binary"
"io"
"log"
+
+ "github.com/klauspost/compress/zstd"
)
func mustReadFull(r io.Reader, buf []byte) {
}
}
-func reader(r io.Reader, sink chan Ent) {
- var err error
+func reader(src io.Reader, sink chan Ent) {
+ comp, err := zstd.NewReader(src)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ br := bufio.NewReaderSize(comp, 1<<17)
+
num := make([]byte, 8)
var cols []string
var namePrev string
var nameLen uint16
var depth, depthPrev uint8
for {
- _, err = io.ReadFull(r, num[:2])
+ _, err = io.ReadFull(br, num[:2])
if err != nil {
if err == io.EOF {
break
}
nameLen = binary.BigEndian.Uint16(num[:2])
nameRaw := make([]byte, nameLen)
- mustReadFull(r, nameRaw)
+ mustReadFull(br, nameRaw)
name := string(nameRaw)
- mustReadFull(r, num[:1])
+ mustReadFull(br, num[:1])
depth = uint8(num[0])
- mustReadFull(r, num)
+ mustReadFull(br, num)
ent := Ent{mtime: int64(binary.BigEndian.Uint64(num))}
- mustReadFull(r, num)
+ mustReadFull(br, num)
ent.size = int64(binary.BigEndian.Uint64(num))
if depth > depthPrev {
cols = append(cols, namePrev[:len(namePrev)-1])
depthPrev = depth
}
close(sink)
+ comp.Close()
}
package main
import (
+ "bufio"
"encoding/binary"
"io"
"log"
+
+ "github.com/klauspost/compress/zstd"
)
func mustWrite(w io.Writer, buf []byte) {
}
}
-func writer(w io.Writer, sink chan Ent) {
+func writer(dst io.Writer, sink chan Ent) {
+ comp, err := zstd.NewWriter(dst)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ bw := bufio.NewWriterSize(comp, 1<<17)
num := make([]byte, 8)
var name string
for ent := range sink {
panic("too long")
}
binary.BigEndian.PutUint16(num[:2], uint16(len(name)))
- mustWrite(w, num[:2])
- mustWrite(w, []byte(name))
+ mustWrite(bw, num[:2])
+ mustWrite(bw, []byte(name))
if len(ent.name) >= 1<<8 {
panic("too deep")
}
- mustWrite(w, []byte{byte(len(ent.name) - 1)})
+ mustWrite(bw, []byte{byte(len(ent.name) - 1)})
binary.BigEndian.PutUint64(num, uint64(ent.mtime))
- mustWrite(w, num)
+ mustWrite(bw, num)
binary.BigEndian.PutUint64(num, uint64(ent.size))
- mustWrite(w, num)
+ mustWrite(bw, num)
+ }
+ if err = bw.Flush(); err != nil {
+ log.Fatalln(err)
+ }
+ if err = comp.Close(); err != nil {
+ log.Fatalln(err)
}
}