glocate -- ZFS-diff-friendly locate-like utility
+This utility is intended to keep the database of filesystem hierarchy
+and quickly display some part of it. Like ordinary *locate utilities.
+But unlike others, it is able to eat zfs-diff's output and apply the
+changes to existing database.
+
+Why I wrote it? Indexing, just "find /big" can take a considerable
+amount of time, like an hour or so, with many I/O operations spent. But
+my home NAS has relatively few number of changes made every day. The
+only possible quick way to determine what exactly was modified is to
+traverse over ZFS'es Merkle trees to find a difference between
+snapshots. Fortunately zfs-diff command does exactly that, providing
+pretty machine-friendly output.
+
+Why this utility is so complicated? Initially it kept all database in
+memory, but that takes 2-3 GiBs of memory, that is huge amount. Moreover
+it fully loads it to perform any basic searches. So current
+implementation uses temporary files and heavy use of data streaming.
+
+Its storage format is trivial:
+
+* 16-bit BE size of the following name
+* entity (file, directory, symbolic link, etc) name itself.
+ Directory has trailing "/"
+* single byte indicating current file's depth
+* 64-bit BE mtime seconds
+* 64-bit BE file or directory (sum of all files and directories) size
+
+Its indexing algorithm is following:
+
+* traverse over all filesystem hierarchy in a *sorted* order. All
+ records are written to temporary file, without directory sizes,
+ because they are not known in advance during the walking
+* during the walk, remember in memory each directory's total size
+* read all records from that temporary file, writing to another one, but
+ replacing directory sizes with ones remembered
+
+Searching is trivial:
+
+* there is no actual searching, just a streaming through all the
+ database file sequentially
+* if some root is specified, then the program will output only its
+ hierarchy path, exiting after it is finished
+
+Updating algorithm is following:
+
+* read all [-+MR] actions from zfs-diff, validating the whole format
+* each file's "R" becomes "-" and "+" actions
+* if there are directory "R", then collect them and stream from current
+ database to determine each path entity you have to "-" and "+"
+* each "+" adds an entry to the list of "M"s
+* sort all "-", "+" and "M" filenames in ascending order
+* get entity's information for each "M" (remembering its size and mtime)
+* stream current database records, writing them to temporary file
+* if record exists in "-"-list, then skip it
+* if any "+" exists in the *sorted* list, that has precedence over the
+ record from database, then insert it into the stream, taking size and
+ mtime information from "M"-list
+* if any "M" exists for the read record, then use it to alter it
+* all that time, directory size calculating algorithm also works, the
+ same one used during indexing
+* create another temporary file to copy the records with actualized
+ directory sizes
+
+How to use it?
+
+ $ zfs snap big@snap1
+ $ cd /big ; glocate -db /tmp/glocate.db -index
+
+ $ glocate -db /tmp/glocate.db
+ [list of all files]
+
+ $ glocate -db /tmp/glocate.db -machine
+ [machine parseable list of files with sizes and mtimes]
+
+ $ glocate -db /tmp/glocate.db -tree
+ [beauty tree-like list of files with sizes and mtimes]
+
+ $ glocate -db /tmp/glocate.db some/sub/path
+ [just a part of the whole hierarchy]
+
+and update it carefully:
+
+ $ zfs snap big@snap2
+ $ zfs diff -FH big@snap2 | glocate -db /tmp/glocate.db -strip /big/ -update
+
glocate is copylefted free software: see the file COPYING for copying
conditions.
--- /dev/null
+package main
+
+import (
+ "bufio"
+ "io"
+ "log"
+ "os"
+ "sort"
+ "strings"
+)
+
+type Ren struct {
+ src []string
+ dst []string
+}
+
+type BySrc []Ren
+
+func (a BySrc) Len() int {
+ return len(a)
+}
+
+func (a BySrc) Swap(i, j int) {
+ a[i], a[j] = a[j], a[i]
+}
+
+func (a BySrc) Less(i, j int) bool {
+ return namesCmp(a[i].src, a[j].src) < 0
+}
+
+type EntByName []*Ent
+
+func (a EntByName) Len() int {
+ return len(a)
+}
+
+func (a EntByName) Swap(i, j int) {
+ a[i], a[j] = a[j], a[i]
+}
+
+func (a EntByName) Less(i, j int) bool {
+ return namesCmp(a[i].name, a[j].name) < 0
+}
+
+func updateWithDiff(dbPath, strip string) *os.File {
+ scanner := bufio.NewScanner(os.Stdin)
+ var t string
+ var delsNames []string
+ var addsNames []string
+ var modsNames []string
+ var rens []Ren
+ var isDir bool
+ for scanner.Scan() {
+ t = scanner.Text()
+ if len(t) == 0 {
+ continue
+ }
+ cols := strings.Split(t, "\t")
+ if len(cols) < 3 {
+ log.Fatalln("bad zfs-diff format")
+ }
+ isDir = cols[1] == "/"
+ name := deoctalize(strings.TrimPrefix(cols[2], strip))
+ if name == "" {
+ continue
+ }
+ name = "./" + name
+ if isDir {
+ name += "/"
+ }
+ switch cols[0] {
+ case "-":
+ delsNames = append(delsNames, name)
+ case "+":
+ addsNames = append(addsNames, name)
+ case "M":
+ modsNames = append(modsNames, name)
+ case "R":
+ if len(cols) != 4 {
+ log.Fatalln("bad zfs-diff format for R")
+ }
+ dst := "./" + deoctalize(strings.TrimPrefix(cols[3], strip))
+ if isDir {
+ dst += "/"
+ rens = append(rens, Ren{
+ src: nameSplit(name),
+ dst: nameSplit(dst),
+ })
+ } else {
+ delsNames = append(delsNames, name)
+ addsNames = append(addsNames, dst)
+ }
+ default:
+ log.Fatalln("bad zfs-diff format")
+ }
+ }
+
+ entsReader := make(chan Ent, 1<<10)
+ db, err := os.Open(dbPath)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ dels := make([][]string, 0, len(delsNames)+len(rens))
+ adds := make([][]string, 0, len(addsNames)+len(rens))
+ mods := make([]*Ent, 0, len(modsNames)+len(rens))
+ if len(rens) > 0 {
+ sort.Sort(BySrc(rens))
+ go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
+ var ent Ent
+ var ok, met bool
+ for {
+ ent, ok = <-entsReader
+ if !ok {
+ break
+ }
+ Retry:
+ if len(rens) > 0 {
+ if hasPrefix(ent.name, rens[0].src) {
+ dels = append(dels, ent.name)
+ dst := append(
+ append([]string{}, rens[0].dst...),
+ ent.name[len(rens[0].src):]...,
+ )
+ adds = append(adds, dst)
+ mods = append(mods, &Ent{name: dst})
+ if !met {
+ // strip "/" from prefix directory
+ dst := rens[0].dst
+ last := dst[len(dst)-1]
+ dst[len(dst)-1] = last[:len(last)-1]
+ met = true
+ }
+ } else if met {
+ met = false
+ rens = rens[1:]
+ goto Retry
+ }
+ }
+ }
+ rens = nil
+ }
+
+ for _, name := range delsNames {
+ dels = append(dels, nameSplit(name))
+ }
+ delsNames = nil
+ sort.Sort(ByName(dels))
+
+ for _, name := range addsNames {
+ adds = append(adds, nameSplit(name))
+ modsNames = append(modsNames, name)
+ }
+ addsNames = nil
+ sort.Sort(ByName(adds))
+
+ for _, name := range modsNames {
+ mods = append(mods, &Ent{name: nameSplit(name)})
+ }
+ modsNames = nil
+ sort.Sort(EntByName(mods))
+ var info os.FileInfo
+ for _, ent := range mods {
+ info, err = os.Stat(nameJoin(ent.name))
+ if err != nil {
+ log.Println("can not stat:", nameJoin(ent.name), ":", err)
+ continue
+ }
+ if info.Mode().IsRegular() {
+ ent.size = info.Size()
+ }
+ ent.mtime = info.ModTime().Unix()
+ }
+
+ _, err = db.Seek(0, io.SeekStart)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ tmp0, err := os.CreateTemp("", "glocate-idx")
+ if err != nil {
+ log.Fatalln(err)
+ }
+ defer os.Remove(tmp0.Name())
+ entsReader = make(chan Ent, 1<<10)
+ entsDirSizer := make(chan Ent, 1<<10)
+ entsWriter := make(chan Ent, 1<<10)
+ go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
+
+ dirSizerJob := make(chan struct{})
+ var dirSizes []int64
+ sinkBack := make(chan Ent, 1)
+ go func() {
+ dirSizer(&dirSizes, 1, sinkBack, entsDirSizer, entsWriter)
+ close(dirSizerJob)
+ }()
+
+ bw := bufio.NewWriterSize(tmp0, 1<<17)
+ writerJob := make(chan struct{})
+ go func() {
+ writer(bw, entsWriter)
+ close(writerJob)
+ }()
+
+ for ent := range entsReader {
+ if len(dels) > 0 && namesCmp(ent.name, dels[0]) == 0 {
+ dels = dels[1:]
+ continue
+ }
+ for len(adds) > 0 && namesCmp(adds[0], ent.name) < 0 {
+ if namesCmp(mods[0].name, adds[0]) != 0 {
+ panic("+ and M lists are out of sync")
+ }
+ newEnt := Ent{
+ name: adds[0],
+ mtime: mods[0].mtime,
+ size: mods[0].size,
+ }
+ entsDirSizer <- newEnt
+ adds = adds[1:]
+ mods = mods[1:]
+ }
+ if len(mods) > 0 && namesCmp(ent.name, mods[0].name) == 0 {
+ ent.mtime = mods[0].mtime
+ ent.size = mods[0].size
+ mods = mods[1:]
+ }
+ entsDirSizer <- ent
+ }
+ for len(adds) > 0 {
+ if namesCmp(mods[0].name, adds[0]) != 0 {
+ panic("+ and M lists are out of sync")
+ }
+ newEnt := Ent{
+ name: adds[0],
+ mtime: mods[0].mtime,
+ size: mods[0].size,
+ }
+ entsDirSizer <- newEnt
+ adds = adds[1:]
+ mods = mods[1:]
+ }
+
+ close(entsDirSizer)
+ <-dirSizerJob
+ close(entsWriter)
+ <-writerJob
+ if err = bw.Flush(); err != nil {
+ log.Fatalln(err)
+ }
+
+ tmp1 := applyDirSizes(tmp0, dirSizes)
+ tmp0.Close()
+ os.Remove(tmp0.Name())
+ return tmp1
+}
--- /dev/null
+package main
+
+import (
+ "bufio"
+ "encoding/binary"
+ "io"
+ "log"
+ "os"
+)
+
+func dirSizer(dirSizes *[]int64, depth int, sinkBack, sinkIn, sinkOut chan Ent) (curSize int64) {
+ var ent Ent
+ var opened bool
+ var dirIdx int
+ for {
+ select {
+ case ent = <-sinkBack:
+ goto Got
+ default:
+ }
+ ent, opened = <-sinkIn
+ if !opened {
+ break
+ }
+ Got:
+ if len(ent.name) < depth {
+ sinkBack <- ent
+ return
+ }
+ sinkOut <- ent
+ curSize += ent.size
+ if !ent.IsDir() {
+ continue
+ }
+ dirIdx = len(*dirSizes)
+ (*dirSizes) = append(*dirSizes, 0)
+ dirSize := dirSizer(dirSizes, depth+1, sinkBack, sinkIn, sinkOut)
+ (*dirSizes)[dirIdx] = dirSize
+ curSize += dirSize
+ }
+ return
+}
+
+func applyDirSizes(src *os.File, dirSizes []int64) *os.File {
+ _, err := src.Seek(0, io.SeekStart)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ tmp, err := os.CreateTemp("", "glocate-idx")
+ if err != nil {
+ log.Fatalln(err)
+ }
+
+ br := bufio.NewReaderSize(src, 1<<17)
+ num := make([]byte, 8)
+ var nameLen int
+ name := make([]byte, 0, 1<<16)
+ bw := bufio.NewWriterSize(tmp, 1<<17)
+ var dirIdx int
+ for {
+ if _, err = io.ReadFull(br, num[:2]); err != nil {
+ if err == io.EOF {
+ break
+ }
+ log.Fatalln(err)
+ }
+ mustWrite(bw, num[:2])
+ nameLen = int(binary.BigEndian.Uint16(num[:2]))
+ name = name[:nameLen]
+ if _, err = io.ReadFull(br, name); err != nil {
+ log.Fatalln(err)
+ }
+ mustWrite(bw, name)
+ if _, err = io.CopyN(bw, br, 1+8); err != nil {
+ log.Fatalln(err)
+ }
+ if name[len(name)-1] == byte('/') {
+ if _, err = br.Discard(8); err != nil {
+ log.Fatalln(err)
+ }
+ binary.BigEndian.PutUint64(num, uint64(dirSizes[dirIdx]))
+ mustWrite(bw, num)
+ dirIdx++
+ } else {
+ if _, err = io.CopyN(bw, br, 8); err != nil {
+ log.Fatalln(err)
+ }
+ }
+ }
+ if err = bw.Flush(); err != nil {
+ log.Fatalln(err)
+ }
+ return tmp
+}
go 1.18
-require (
- github.com/dustin/go-humanize v1.0.0
- github.com/klauspost/compress v1.15.8
-)
+require github.com/dustin/go-humanize v1.0.0
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
-github.com/klauspost/compress v1.15.8 h1:JahtItbkWjf2jzm/T+qgMxkP9EMHsqEUA6vCMGmXvhA=
-github.com/klauspost/compress v1.15.8/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
--- /dev/null
+package main
+
+import (
+ "bufio"
+ "fmt"
+ "log"
+ "os"
+)
+
+func index() *os.File {
+ tmp0, err := os.CreateTemp("", "glocate-idx")
+ if err != nil {
+ log.Fatalln(err)
+ }
+ defer os.Remove(tmp0.Name())
+
+ entsWalker := make(chan Ent, 1<<10)
+ entsWriter := make(chan Ent, 1<<10)
+ dirSizerJob := make(chan struct{})
+ var dirSizes []int64
+ entsWalker <- Ent{name: []string{"./"}}
+ sinkBack := make(chan Ent, 1)
+ go func() {
+ dirSizer(&dirSizes, 1, sinkBack, entsWalker, entsWriter)
+ close(dirSizerJob)
+ }()
+
+ bw := bufio.NewWriterSize(tmp0, 1<<17)
+ writerJob := make(chan struct{})
+ go func() {
+ writer(bw, entsWriter)
+ close(writerJob)
+ }()
+
+ walkerStatusStop := make(chan struct{})
+ go walkerStatus(walkerStatusStop)
+ err = walker(entsWalker, []string{"./"})
+ walkerStatusStop <- struct{}{}
+ <-walkerStatusStop
+ fmt.Print("\r")
+ if err != nil {
+ log.Fatalln(err)
+ }
+ close(entsWalker)
+ <-dirSizerJob
+ close(entsWriter)
+ <-writerJob
+ if err = bw.Flush(); err != nil {
+ log.Fatalln(err)
+ }
+
+ tmp1 := applyDirSizes(tmp0, dirSizes)
+ tmp0.Close()
+ return tmp1
+}
-/*
-glocate -- ZFS-diff-friendly locate-like utility
-Copyright (C) 2022 Sergey Matveev <stargrave@stargrave.org>
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, version 3 of the License.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
package main
import (
"bufio"
- "encoding/gob"
"flag"
- "fmt"
- "io"
- "io/fs"
"log"
"os"
- "path"
- "sort"
- "strconv"
"strings"
"syscall"
- "time"
-
- "github.com/dustin/go-humanize"
- "github.com/klauspost/compress/zstd"
)
-type File struct {
- Name string
- Size uint64
- Mtime int64
- Files []File
+type Ent struct {
+ name []string
+ mtime int64
+ size int64
}
-type ByName []File
-
-func (a ByName) Len() int {
- return len(a)
-}
-
-func (a ByName) Swap(i, j int) {
- a[i], a[j] = a[j], a[i]
-}
-
-func (a ByName) Less(i, j int) bool {
- return a[i].Name < a[j].Name
-}
-
-func (file *File) IsDir() bool {
- return file.Name[len(file.Name)-1] == '/'
-}
-
-func walk(root string) ([]File, uint64, error) {
- fd, err := os.Open(root)
- if err != nil {
- return nil, 0, err
- }
- var files []File
- var size uint64
- var info fs.FileInfo
- for {
- ents, err := fd.ReadDir(1 << 10)
- if err != nil {
- if err == io.EOF {
- break
- }
- fd.Close()
- return nil, 0, err
- }
- for _, ent := range ents {
- file := File{Name: ent.Name()}
- fullPath := path.Join(root, file.Name)
- if ent.IsDir() {
- file.Name += "/"
- }
- info, err = ent.Info()
- if err != nil {
- log.Println("can not stat:", fullPath, ":", err)
- files = append(files, file)
- continue
- }
- file.Mtime = info.ModTime().Unix()
- if ent.IsDir() {
- file.Files, file.Size, err = walk(fullPath)
- if err != nil {
- log.Println("can not walk:", fullPath, ":", err)
- files = append(files, file)
- continue
- }
- } else if info.Mode().IsRegular() {
- file.Size = uint64(info.Size())
- }
- files = append(files, file)
- size += file.Size
- }
- }
- fd.Close()
- sort.Sort(ByName(files))
- return files, size, nil
-}
-
-func usage() {
- log.Println("usage")
- os.Exit(1)
-}
-
-func load(dbPath string) *File {
- fd, err := os.Open(dbPath)
- if err != nil {
- log.Fatalln(err)
- }
- defer fd.Close()
- comp, err := zstd.NewReader(fd)
- if err != nil {
- log.Fatalln(err)
- }
- dec := gob.NewDecoder(comp)
- var file File
- err = dec.Decode(&file)
- if err != nil {
- log.Fatalln(err)
- }
- comp.Close()
- return &file
+func (ent *Ent) IsDir() bool {
+ return IsDir(ent.name[len(ent.name)-1])
}
-func (db *File) dump(dbPath string) error {
- tmp, err := os.CreateTemp(path.Dir(dbPath), "glocate")
- if err != nil {
- return err
- }
- defer os.Remove(tmp.Name())
- comp, err := zstd.NewWriter(
- tmp, zstd.WithEncoderLevel(zstd.SpeedBestCompression),
- )
- if err != nil {
- return err
- }
- enc := gob.NewEncoder(comp)
- err = enc.Encode(db)
- if err != nil {
- return err
- }
- err = comp.Close()
- if err != nil {
- return err
- }
- err = tmp.Close()
- if err != nil {
- return err
- }
+func dbCommit(dbPath string, tmp *os.File) {
umask := syscall.Umask(0)
syscall.Umask(umask)
- err = os.Chmod(tmp.Name(), os.FileMode(0666&^umask))
- if err != nil {
- return err
- }
- return os.Rename(tmp.Name(), dbPath)
-}
-
-func (file *File) listBeauty(indent string, n int, isLast, veryFirst bool) {
- if veryFirst {
- fmt.Printf("[%s]\n", humanize.IBytes(file.Size))
- } else {
- var box string
- if isLast {
- box = "└"
- } else {
- box = "├"
- }
- name := file.Name
- fmt.Printf("%s%s %s\t№%d [%s] %s\n",
- indent, box, name, n, humanize.IBytes(file.Size),
- time.Unix(file.Mtime, 0).Format("2006-01-02"),
- )
- if isLast {
- indent += " "
- } else {
- indent += "│ "
- }
- }
- for n, f := range file.Files {
- n++
- f.listBeauty(indent, n, n == len(file.Files), false)
- }
-}
-
-func (file *File) listSimple(root string, veryFirst bool) {
- name := file.Name
- fmt.Println(
- strconv.FormatUint(file.Size, 10),
- time.Unix(file.Mtime, 0).Format("2006-01-02T15:04:05"),
- root+name,
- )
- if veryFirst {
- name = ""
- }
- for _, f := range file.Files {
- f.listSimple(root+name, false)
- }
-}
-
-func (file *File) listFiles(root string, veryFirst bool) {
- name := file.Name
- if veryFirst {
- root = ""
- } else {
- fmt.Println(root + name)
- root += name
- }
- for _, f := range file.Files {
- f.listFiles(root, false)
- }
-}
-
-func (db *File) find(p string) (file *File, parents []*File, idx int, err error) {
- file = db
- var f File
-Entities:
- for _, ent := range strings.Split(p, "/") {
- for idx, f = range file.Files {
- if (ent == f.Name) || (ent+"/" == f.Name) {
- parents = append(parents, file)
- file = &f
- continue Entities
- }
- }
- err = fmt.Errorf("no entity found: %s", ent)
- return
- }
- return
-}
-
-func (db *File) remove(p string) error {
- file, parents, idx, err := db.find(p)
- if err != nil {
- return err
- }
- lastParent := parents[len(parents)-1]
- lastParent.Files = append(
- lastParent.Files[:idx],
- lastParent.Files[idx+1:]...,
- )
- for _, parent := range parents {
- parent.Size -= file.Size
- }
- return nil
-}
-
-func (db *File) add(p string) error {
- cols := strings.Split(p, "/")
- cols, name := cols[:len(cols)-1], cols[len(cols)-1]
- var parent *File
- var err error
- if len(cols) != 0 {
- parent, _, _, err = db.find(path.Join(cols...))
- if err != nil {
- return err
- }
- } else {
- parent = db
- }
- info, err := os.Stat(p)
- if err != nil {
- return err
- }
- if info.IsDir() {
- name += "/"
- }
- file := File{
- Name: name,
- Size: uint64(info.Size()),
- Mtime: info.ModTime().Unix(),
+ if err := os.Chmod(tmp.Name(), os.FileMode(0666&^umask)); err != nil {
+ log.Fatalln(err)
}
- parent.Files = append(parent.Files, file)
- sort.Sort(ByName(parent.Files))
- parent.Size += file.Size
- return nil
-}
-
-func deoctalize(s string) string {
- chars := make([]byte, 0, len(s))
- for i := 0; i < len(s); i++ {
- if s[i] == '\\' {
- b, err := strconv.ParseUint("0"+s[i+1:i+1+3], 0, 8)
- if err != nil {
- log.Fatalln(err)
- }
- chars = append(chars, byte(b))
- i += 3
- } else {
- chars = append(chars, s[i])
- }
+ if err := os.Rename(tmp.Name(), dbPath); err != nil {
+ log.Fatalln(err)
}
- return string(chars)
}
func main() {
- dbPath := flag.String("db", ".glocate.db", "Path to state file (database)")
- doIndex := flag.Bool("index", false, "Initialize database")
- doUpdate := flag.Bool("update", false, "Update database by zfs-diff's output")
- showBeauty := flag.Bool("show-beauty", false, "Show beauty human-friendly listing")
- showSimple := flag.Bool("show-simple", false, "Show simple listing")
- stripPrefix := flag.String("strip-prefix", "", "Strip prefix from zfs-diff's output")
+ dbPath := flag.String("db", "glocate.db", "Path to database")
+ doIndex := flag.Bool("index", false, "Perform indexing")
+ doUpdate := flag.Bool("update", false, "Feed zfs-diff and update the database")
+ strip := flag.String("strip", "", "Strip prefix from zfs-diff's paths")
+ showMachine := flag.Bool("machine", false, "Show machine friendly")
+ showTree := flag.Bool("tree", false, "Show human-friendly tree")
+ dryRun := flag.Bool("n", false, "Dry run, do not overwrite database")
flag.Parse()
log.SetFlags(log.Ldate | log.Ltime | log.Lmicroseconds | log.Lshortfile)
if *doIndex {
- files, size, err := walk(".")
- if err != nil {
- log.Fatalln(err)
- }
- db := File{Name: "./", Size: size, Files: files}
- err = db.dump(*dbPath)
- if err != nil {
- log.Fatalln(err)
+ tmp := index()
+ tmp.Close()
+ if !*dryRun {
+ dbCommit(*dbPath, tmp)
}
return
}
- db := load(*dbPath)
if *doUpdate {
- scanner := bufio.NewScanner(os.Stdin)
- var t string
- for scanner.Scan() {
- t = scanner.Text()
- if len(t) == 0 {
- continue
- }
- cols := strings.Split(t, "\t")
- if len(cols) < 2 {
- log.Fatalln("bad zfs-diff format")
- }
- switch cols[0] {
- case "-":
- name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix))
- if err := db.remove(name); err != nil {
- log.Println("can not -:", name, ":", err)
- }
- case "+":
- name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix))
- if err := db.add(name); err != nil {
- log.Println("can not +:", name, ":", err)
- }
- case "M":
- name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix))
- if name == "" {
- continue
- }
- file, _, _, err := db.find(name)
- if err != nil {
- log.Println("can not M:", name, ":", err)
- continue
- }
- info, err := os.Stat(name)
- if err != nil {
- log.Println("can not M:", name, ":", err)
- continue
- }
- if info.Mode().IsRegular() {
- file.Size = uint64(info.Size())
- }
- file.Mtime = info.ModTime().Unix()
- case "R":
- if len(cols) != 3 {
- log.Fatalln("bad zfs-diff format for R")
- }
- name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix))
- if err := db.remove(name); err != nil {
- log.Println("can not R-:", name, ":", err)
- continue
- }
- name = deoctalize(strings.TrimPrefix(cols[2], *stripPrefix))
- if err := db.add(name); err != nil {
- log.Println("can not R+:", name, ":", err)
- }
- default:
- log.Fatalln("bad zfs-diff format")
- }
- }
- if err := scanner.Err(); err != nil {
- log.Fatalln(err)
- }
- if err := db.dump(*dbPath); err != nil {
- log.Fatalln(err)
+ tmp := updateWithDiff(*dbPath, *strip)
+ tmp.Close()
+ if !*dryRun {
+ dbCommit(*dbPath, tmp)
}
return
}
- veryFirst := true
- if len(flag.Args()) > 0 {
- root := flag.Args()[0]
- if root[:2] == "./" {
- root = root[2:]
- }
- if root[len(root)-1:] == "/" {
- root = root[:len(root)-1]
- }
- file, _, _, err := db.find(root)
- if err != nil {
- log.Fatalln(err)
- }
- db = file
- db.Name = root + "/"
- veryFirst = false
+ db, err := os.Open(*dbPath)
+ if err != nil {
+ log.Fatalln(err)
}
+ entsReader := make(chan Ent, 1<<10)
+ go reader(bufio.NewReaderSize(db, 1<<17), entsReader)
- if *showBeauty {
- db.listBeauty("", 0, false, veryFirst)
- return
+ entsPrinter := make(chan Ent, 1<<10)
+ printerJob := make(chan struct{})
+ go func() {
+ if *showMachine {
+ printerMachine(entsPrinter)
+ } else if *showTree {
+ printerTree(entsPrinter)
+ } else {
+ printerSimple(entsPrinter)
+ }
+ close(printerJob)
+ }()
+
+ var root []string
+ if len(flag.Args()) > 0 {
+ root = strings.Split("./"+flag.Arg(0), "/")
}
- if *showSimple {
- db.listSimple("", veryFirst)
- return
+
+ rootMet := false
+ for ent := range entsReader {
+ if hasPrefix(ent.name, root) {
+ entsPrinter <- ent
+ rootMet = true
+ } else if rootMet {
+ break
+ }
}
- db.listFiles("", veryFirst)
+ close(entsPrinter)
+ <-printerJob
}
--- /dev/null
+package main
+
+import (
+ "log"
+ "path"
+ "strconv"
+ "strings"
+)
+
+func IsDir(s string) bool {
+ return s[len(s)-1] == '/'
+}
+
+type ByName [][]string
+
+func (a ByName) Len() int {
+ return len(a)
+}
+
+func (a ByName) Swap(i, j int) {
+ a[i], a[j] = a[j], a[i]
+}
+
+func (a ByName) Less(i, j int) bool {
+ return namesCmp(a[i], a[j]) < 0
+}
+
+func nameSplit(name string) []string {
+ cols := strings.Split(name, "/")
+ if IsDir(name) {
+ cols = cols[:len(cols)-1]
+ cols[len(cols)-1] += "/"
+ }
+ return cols
+}
+
+func nameJoin(name []string) (s string) {
+ s = path.Join(name...)
+ if IsDir(name[len(name)-1]) {
+ s += "/"
+ }
+ return
+}
+
+func namesCmp(n1, n2 []string) int {
+ min := len(n1)
+ if len(n2) < min {
+ min = len(n2)
+ }
+ var t1, t2 string
+ for i := 0; i < min; i++ {
+ t1 = strings.TrimSuffix(n1[i], "/")
+ t2 = strings.TrimSuffix(n2[i], "/")
+ if t1 < t2 {
+ return -1
+ }
+ if t1 > t2 {
+ return +1
+ }
+ }
+ if len(n1) > len(n2) {
+ return +1
+ }
+ if len(n1) < len(n2) {
+ return -1
+ }
+ return 0
+}
+
+func hasPrefix(name, prefix []string) bool {
+ if len(name) < len(prefix) {
+ return false
+ }
+ return namesCmp(name[:len(prefix)], prefix) == 0
+}
+
+func deoctalize(s string) string {
+ chars := make([]byte, 0, len(s))
+ for i := 0; i < len(s); i++ {
+ if s[i] == '\\' {
+ b, err := strconv.ParseUint("0"+s[i+1:i+1+3], 0, 8)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ chars = append(chars, byte(b))
+ i += 3
+ } else {
+ chars = append(chars, s[i])
+ }
+ }
+ return string(chars)
+}
--- /dev/null
+package main
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/dustin/go-humanize"
+)
+
+func printerSimple(ents chan Ent) {
+ for ent := range ents {
+ fmt.Println(nameJoin(ent.name))
+ }
+}
+
+func printerMachine(ents chan Ent) {
+ for ent := range ents {
+ fmt.Println(
+ strconv.FormatUint(uint64(ent.size), 10),
+ time.Unix(int64(ent.mtime), 0).Format("2006-01-02T15:04:05"),
+ nameJoin(ent.name),
+ )
+ }
+}
+
+type TreePrintEnt struct {
+ ent Ent
+ isLast bool
+}
+
+func laster(ents chan Ent, trees chan TreePrintEnt) {
+ entPrev := <-ents
+ for ent := range ents {
+ tree := TreePrintEnt{ent: entPrev}
+ if len(ent.name) < len(entPrev.name) {
+ tree.isLast = true
+ }
+ trees <- tree
+ entPrev = ent
+ }
+ trees <- TreePrintEnt{ent: entPrev}
+ close(trees)
+}
+
+func printerTree(ents chan Ent) {
+ trees := make(chan TreePrintEnt, 1<<10)
+ go laster(ents, trees)
+ first := true
+ var box string
+ for ent := range trees {
+ if first {
+ fmt.Printf(
+ "%s\t[%s]\n", nameJoin(ent.ent.name),
+ humanize.IBytes(uint64(ent.ent.size)),
+ )
+ first = false
+ continue
+ }
+ if ent.isLast {
+ box = "└"
+ } else {
+ box = "├"
+ }
+ fmt.Printf("%s%s %s\t[%s] %s\n",
+ strings.Repeat("│ ", len(ent.ent.name)-2), box,
+ nameJoin(ent.ent.name), humanize.IBytes(uint64(ent.ent.size)),
+ time.Unix(ent.ent.mtime, 0).Format("2006-01-02"),
+ )
+ }
+}
--- /dev/null
+package main
+
+import (
+ "encoding/binary"
+ "io"
+ "log"
+)
+
+func mustReadFull(r io.Reader, buf []byte) {
+ if _, err := io.ReadFull(r, buf); err != nil {
+ log.Fatalln(err)
+ }
+}
+
+func reader(r io.Reader, sink chan Ent) {
+ var err error
+ num := make([]byte, 8)
+ var cols []string
+ var namePrev string
+ var nameLen uint16
+ var depth, depthPrev uint8
+ for {
+ _, err = io.ReadFull(r, num[:2])
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ log.Fatalln(err)
+ }
+ nameLen = binary.BigEndian.Uint16(num[:2])
+ nameRaw := make([]byte, nameLen)
+ mustReadFull(r, nameRaw)
+ name := string(nameRaw)
+ mustReadFull(r, num[:1])
+ depth = uint8(num[0])
+ mustReadFull(r, num)
+ ent := Ent{mtime: int64(binary.BigEndian.Uint64(num))}
+ mustReadFull(r, num)
+ ent.size = int64(binary.BigEndian.Uint64(num))
+ if depth > depthPrev {
+ cols = append(cols, namePrev[:len(namePrev)-1])
+ } else if depth < depthPrev {
+ cols = cols[:len(cols)-int(depthPrev-depth)]
+ }
+ ent.name = append([]string{}, append(cols, name)...)
+ sink <- ent
+ namePrev = name
+ depthPrev = depth
+ }
+ close(sink)
+}
--- /dev/null
+package main
+
+import (
+ "fmt"
+ "time"
+)
+
+func walkerStatus(stop chan struct{}) {
+ tick := time.Tick(time.Second)
+ for {
+ fmt.Printf("\r%d files %d directories", WalkerFiles, WalkerDirs)
+ select {
+ case <-tick:
+ case <-stop:
+ close(stop)
+ return
+ }
+ }
+}
--- /dev/null
+package main
+
+import (
+ "io/fs"
+ "log"
+ "os"
+ "path"
+)
+
+var (
+ WalkerFiles int64
+ WalkerDirs int64
+)
+
+func walker(sink chan Ent, root []string) error {
+ files, err := os.ReadDir(path.Join(root...)) // it is already sorted
+ if err != nil {
+ return err
+ }
+ var info fs.FileInfo
+ ents := make([]Ent, 0, len(files))
+ for _, file := range files {
+ ent := Ent{name: append([]string{}, append(root, file.Name())...)}
+ info, err = file.Info()
+ if err == nil {
+ if info.IsDir() {
+ ent.name[len(ent.name)-1] += "/"
+ } else if info.Mode().IsRegular() {
+ ent.size = info.Size()
+ }
+ ent.mtime = info.ModTime().Unix()
+ } else {
+ log.Println("can not stat:", path.Join(ent.name...), ":", err)
+ }
+ ents = append(ents, ent)
+ }
+ for _, ent := range ents {
+ sink <- ent
+ if ent.IsDir() {
+ WalkerDirs++
+ } else {
+ WalkerFiles++
+ continue
+ }
+ err = walker(sink, ent.name)
+ if err != nil {
+ log.Println("can not stat:", path.Join(ent.name...), ":", err)
+ continue
+ }
+ }
+ return nil
+}
--- /dev/null
+package main
+
+import (
+ "encoding/binary"
+ "io"
+ "log"
+)
+
+func mustWrite(w io.Writer, buf []byte) {
+ if _, err := w.Write(buf); err != nil {
+ log.Fatalln(err)
+ }
+}
+
+func writer(w io.Writer, sink chan Ent) {
+ num := make([]byte, 8)
+ var name string
+ for ent := range sink {
+ name = ent.name[len(ent.name)-1]
+ if len(ent.name) >= 1<<16 {
+ panic("too long")
+ }
+ binary.BigEndian.PutUint16(num[:2], uint16(len(name)))
+ mustWrite(w, num[:2])
+ mustWrite(w, []byte(name))
+ if len(ent.name) >= 1<<8 {
+ panic("too deep")
+ }
+ mustWrite(w, []byte{byte(len(ent.name) - 1)})
+ binary.BigEndian.PutUint64(num, uint64(ent.mtime))
+ mustWrite(w, num)
+ binary.BigEndian.PutUint64(num, uint64(ent.size))
+ mustWrite(w, num)
+ }
+}