From d5b8c235a1f3088c6c1e7261df3d1b565d042db2ba2ad1bbd1018782b9178e1f Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Mon, 8 Aug 2022 20:03:55 +0300 Subject: [PATCH] Improved version --- README | 85 ++++++++++ diff.go | 254 +++++++++++++++++++++++++++++ dirsizer.go | 94 +++++++++++ go.mod | 5 +- go.sum | 2 - index.go | 55 +++++++ main.go | 447 +++++++--------------------------------------------- names.go | 92 +++++++++++ printers.go | 72 +++++++++ reader.go | 51 ++++++ status.go | 19 +++ walker.go | 52 ++++++ writer.go | 35 ++++ 13 files changed, 868 insertions(+), 395 deletions(-) create mode 100644 diff.go create mode 100644 dirsizer.go create mode 100644 index.go create mode 100644 names.go create mode 100644 printers.go create mode 100644 reader.go create mode 100644 status.go create mode 100644 walker.go create mode 100644 writer.go diff --git a/README b/README index 5e946c2..358d530 100644 --- a/README +++ b/README @@ -1,4 +1,89 @@ glocate -- ZFS-diff-friendly locate-like utility +This utility is intended to keep the database of filesystem hierarchy +and quickly display some part of it. Like ordinary *locate utilities. +But unlike others, it is able to eat zfs-diff's output and apply the +changes to existing database. + +Why I wrote it? Indexing, just "find /big" can take a considerable +amount of time, like an hour or so, with many I/O operations spent. But +my home NAS has relatively few number of changes made every day. The +only possible quick way to determine what exactly was modified is to +traverse over ZFS'es Merkle trees to find a difference between +snapshots. Fortunately zfs-diff command does exactly that, providing +pretty machine-friendly output. + +Why this utility is so complicated? Initially it kept all database in +memory, but that takes 2-3 GiBs of memory, that is huge amount. Moreover +it fully loads it to perform any basic searches. So current +implementation uses temporary files and heavy use of data streaming. + +Its storage format is trivial: + +* 16-bit BE size of the following name +* entity (file, directory, symbolic link, etc) name itself. + Directory has trailing "/" +* single byte indicating current file's depth +* 64-bit BE mtime seconds +* 64-bit BE file or directory (sum of all files and directories) size + +Its indexing algorithm is following: + +* traverse over all filesystem hierarchy in a *sorted* order. All + records are written to temporary file, without directory sizes, + because they are not known in advance during the walking +* during the walk, remember in memory each directory's total size +* read all records from that temporary file, writing to another one, but + replacing directory sizes with ones remembered + +Searching is trivial: + +* there is no actual searching, just a streaming through all the + database file sequentially +* if some root is specified, then the program will output only its + hierarchy path, exiting after it is finished + +Updating algorithm is following: + +* read all [-+MR] actions from zfs-diff, validating the whole format +* each file's "R" becomes "-" and "+" actions +* if there are directory "R", then collect them and stream from current + database to determine each path entity you have to "-" and "+" +* each "+" adds an entry to the list of "M"s +* sort all "-", "+" and "M" filenames in ascending order +* get entity's information for each "M" (remembering its size and mtime) +* stream current database records, writing them to temporary file +* if record exists in "-"-list, then skip it +* if any "+" exists in the *sorted* list, that has precedence over the + record from database, then insert it into the stream, taking size and + mtime information from "M"-list +* if any "M" exists for the read record, then use it to alter it +* all that time, directory size calculating algorithm also works, the + same one used during indexing +* create another temporary file to copy the records with actualized + directory sizes + +How to use it? + + $ zfs snap big@snap1 + $ cd /big ; glocate -db /tmp/glocate.db -index + + $ glocate -db /tmp/glocate.db + [list of all files] + + $ glocate -db /tmp/glocate.db -machine + [machine parseable list of files with sizes and mtimes] + + $ glocate -db /tmp/glocate.db -tree + [beauty tree-like list of files with sizes and mtimes] + + $ glocate -db /tmp/glocate.db some/sub/path + [just a part of the whole hierarchy] + +and update it carefully: + + $ zfs snap big@snap2 + $ zfs diff -FH big@snap2 | glocate -db /tmp/glocate.db -strip /big/ -update + glocate is copylefted free software: see the file COPYING for copying conditions. diff --git a/diff.go b/diff.go new file mode 100644 index 0000000..61ba1ac --- /dev/null +++ b/diff.go @@ -0,0 +1,254 @@ +package main + +import ( + "bufio" + "io" + "log" + "os" + "sort" + "strings" +) + +type Ren struct { + src []string + dst []string +} + +type BySrc []Ren + +func (a BySrc) Len() int { + return len(a) +} + +func (a BySrc) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a BySrc) Less(i, j int) bool { + return namesCmp(a[i].src, a[j].src) < 0 +} + +type EntByName []*Ent + +func (a EntByName) Len() int { + return len(a) +} + +func (a EntByName) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a EntByName) Less(i, j int) bool { + return namesCmp(a[i].name, a[j].name) < 0 +} + +func updateWithDiff(dbPath, strip string) *os.File { + scanner := bufio.NewScanner(os.Stdin) + var t string + var delsNames []string + var addsNames []string + var modsNames []string + var rens []Ren + var isDir bool + for scanner.Scan() { + t = scanner.Text() + if len(t) == 0 { + continue + } + cols := strings.Split(t, "\t") + if len(cols) < 3 { + log.Fatalln("bad zfs-diff format") + } + isDir = cols[1] == "/" + name := deoctalize(strings.TrimPrefix(cols[2], strip)) + if name == "" { + continue + } + name = "./" + name + if isDir { + name += "/" + } + switch cols[0] { + case "-": + delsNames = append(delsNames, name) + case "+": + addsNames = append(addsNames, name) + case "M": + modsNames = append(modsNames, name) + case "R": + if len(cols) != 4 { + log.Fatalln("bad zfs-diff format for R") + } + dst := "./" + deoctalize(strings.TrimPrefix(cols[3], strip)) + if isDir { + dst += "/" + rens = append(rens, Ren{ + src: nameSplit(name), + dst: nameSplit(dst), + }) + } else { + delsNames = append(delsNames, name) + addsNames = append(addsNames, dst) + } + default: + log.Fatalln("bad zfs-diff format") + } + } + + entsReader := make(chan Ent, 1<<10) + db, err := os.Open(dbPath) + if err != nil { + log.Fatalln(err) + } + dels := make([][]string, 0, len(delsNames)+len(rens)) + adds := make([][]string, 0, len(addsNames)+len(rens)) + mods := make([]*Ent, 0, len(modsNames)+len(rens)) + if len(rens) > 0 { + sort.Sort(BySrc(rens)) + go reader(bufio.NewReaderSize(db, 1<<17), entsReader) + var ent Ent + var ok, met bool + for { + ent, ok = <-entsReader + if !ok { + break + } + Retry: + if len(rens) > 0 { + if hasPrefix(ent.name, rens[0].src) { + dels = append(dels, ent.name) + dst := append( + append([]string{}, rens[0].dst...), + ent.name[len(rens[0].src):]..., + ) + adds = append(adds, dst) + mods = append(mods, &Ent{name: dst}) + if !met { + // strip "/" from prefix directory + dst := rens[0].dst + last := dst[len(dst)-1] + dst[len(dst)-1] = last[:len(last)-1] + met = true + } + } else if met { + met = false + rens = rens[1:] + goto Retry + } + } + } + rens = nil + } + + for _, name := range delsNames { + dels = append(dels, nameSplit(name)) + } + delsNames = nil + sort.Sort(ByName(dels)) + + for _, name := range addsNames { + adds = append(adds, nameSplit(name)) + modsNames = append(modsNames, name) + } + addsNames = nil + sort.Sort(ByName(adds)) + + for _, name := range modsNames { + mods = append(mods, &Ent{name: nameSplit(name)}) + } + modsNames = nil + sort.Sort(EntByName(mods)) + var info os.FileInfo + for _, ent := range mods { + info, err = os.Stat(nameJoin(ent.name)) + if err != nil { + log.Println("can not stat:", nameJoin(ent.name), ":", err) + continue + } + if info.Mode().IsRegular() { + ent.size = info.Size() + } + ent.mtime = info.ModTime().Unix() + } + + _, err = db.Seek(0, io.SeekStart) + if err != nil { + log.Fatalln(err) + } + tmp0, err := os.CreateTemp("", "glocate-idx") + if err != nil { + log.Fatalln(err) + } + defer os.Remove(tmp0.Name()) + entsReader = make(chan Ent, 1<<10) + entsDirSizer := make(chan Ent, 1<<10) + entsWriter := make(chan Ent, 1<<10) + go reader(bufio.NewReaderSize(db, 1<<17), entsReader) + + dirSizerJob := make(chan struct{}) + var dirSizes []int64 + sinkBack := make(chan Ent, 1) + go func() { + dirSizer(&dirSizes, 1, sinkBack, entsDirSizer, entsWriter) + close(dirSizerJob) + }() + + bw := bufio.NewWriterSize(tmp0, 1<<17) + writerJob := make(chan struct{}) + go func() { + writer(bw, entsWriter) + close(writerJob) + }() + + for ent := range entsReader { + if len(dels) > 0 && namesCmp(ent.name, dels[0]) == 0 { + dels = dels[1:] + continue + } + for len(adds) > 0 && namesCmp(adds[0], ent.name) < 0 { + if namesCmp(mods[0].name, adds[0]) != 0 { + panic("+ and M lists are out of sync") + } + newEnt := Ent{ + name: adds[0], + mtime: mods[0].mtime, + size: mods[0].size, + } + entsDirSizer <- newEnt + adds = adds[1:] + mods = mods[1:] + } + if len(mods) > 0 && namesCmp(ent.name, mods[0].name) == 0 { + ent.mtime = mods[0].mtime + ent.size = mods[0].size + mods = mods[1:] + } + entsDirSizer <- ent + } + for len(adds) > 0 { + if namesCmp(mods[0].name, adds[0]) != 0 { + panic("+ and M lists are out of sync") + } + newEnt := Ent{ + name: adds[0], + mtime: mods[0].mtime, + size: mods[0].size, + } + entsDirSizer <- newEnt + adds = adds[1:] + mods = mods[1:] + } + + close(entsDirSizer) + <-dirSizerJob + close(entsWriter) + <-writerJob + if err = bw.Flush(); err != nil { + log.Fatalln(err) + } + + tmp1 := applyDirSizes(tmp0, dirSizes) + tmp0.Close() + os.Remove(tmp0.Name()) + return tmp1 +} diff --git a/dirsizer.go b/dirsizer.go new file mode 100644 index 0000000..b4f5b4e --- /dev/null +++ b/dirsizer.go @@ -0,0 +1,94 @@ +package main + +import ( + "bufio" + "encoding/binary" + "io" + "log" + "os" +) + +func dirSizer(dirSizes *[]int64, depth int, sinkBack, sinkIn, sinkOut chan Ent) (curSize int64) { + var ent Ent + var opened bool + var dirIdx int + for { + select { + case ent = <-sinkBack: + goto Got + default: + } + ent, opened = <-sinkIn + if !opened { + break + } + Got: + if len(ent.name) < depth { + sinkBack <- ent + return + } + sinkOut <- ent + curSize += ent.size + if !ent.IsDir() { + continue + } + dirIdx = len(*dirSizes) + (*dirSizes) = append(*dirSizes, 0) + dirSize := dirSizer(dirSizes, depth+1, sinkBack, sinkIn, sinkOut) + (*dirSizes)[dirIdx] = dirSize + curSize += dirSize + } + return +} + +func applyDirSizes(src *os.File, dirSizes []int64) *os.File { + _, err := src.Seek(0, io.SeekStart) + if err != nil { + log.Fatalln(err) + } + tmp, err := os.CreateTemp("", "glocate-idx") + if err != nil { + log.Fatalln(err) + } + + br := bufio.NewReaderSize(src, 1<<17) + num := make([]byte, 8) + var nameLen int + name := make([]byte, 0, 1<<16) + bw := bufio.NewWriterSize(tmp, 1<<17) + var dirIdx int + for { + if _, err = io.ReadFull(br, num[:2]); err != nil { + if err == io.EOF { + break + } + log.Fatalln(err) + } + mustWrite(bw, num[:2]) + nameLen = int(binary.BigEndian.Uint16(num[:2])) + name = name[:nameLen] + if _, err = io.ReadFull(br, name); err != nil { + log.Fatalln(err) + } + mustWrite(bw, name) + if _, err = io.CopyN(bw, br, 1+8); err != nil { + log.Fatalln(err) + } + if name[len(name)-1] == byte('/') { + if _, err = br.Discard(8); err != nil { + log.Fatalln(err) + } + binary.BigEndian.PutUint64(num, uint64(dirSizes[dirIdx])) + mustWrite(bw, num) + dirIdx++ + } else { + if _, err = io.CopyN(bw, br, 8); err != nil { + log.Fatalln(err) + } + } + } + if err = bw.Flush(); err != nil { + log.Fatalln(err) + } + return tmp +} diff --git a/go.mod b/go.mod index 21a56ac..f8654fc 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,4 @@ module go.stargrave.org/glocate go 1.18 -require ( - github.com/dustin/go-humanize v1.0.0 - github.com/klauspost/compress v1.15.8 -) +require github.com/dustin/go-humanize v1.0.0 diff --git a/go.sum b/go.sum index b9239e0..4f89ea4 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,2 @@ github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/klauspost/compress v1.15.8 h1:JahtItbkWjf2jzm/T+qgMxkP9EMHsqEUA6vCMGmXvhA= -github.com/klauspost/compress v1.15.8/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= diff --git a/index.go b/index.go new file mode 100644 index 0000000..c6fa539 --- /dev/null +++ b/index.go @@ -0,0 +1,55 @@ +package main + +import ( + "bufio" + "fmt" + "log" + "os" +) + +func index() *os.File { + tmp0, err := os.CreateTemp("", "glocate-idx") + if err != nil { + log.Fatalln(err) + } + defer os.Remove(tmp0.Name()) + + entsWalker := make(chan Ent, 1<<10) + entsWriter := make(chan Ent, 1<<10) + dirSizerJob := make(chan struct{}) + var dirSizes []int64 + entsWalker <- Ent{name: []string{"./"}} + sinkBack := make(chan Ent, 1) + go func() { + dirSizer(&dirSizes, 1, sinkBack, entsWalker, entsWriter) + close(dirSizerJob) + }() + + bw := bufio.NewWriterSize(tmp0, 1<<17) + writerJob := make(chan struct{}) + go func() { + writer(bw, entsWriter) + close(writerJob) + }() + + walkerStatusStop := make(chan struct{}) + go walkerStatus(walkerStatusStop) + err = walker(entsWalker, []string{"./"}) + walkerStatusStop <- struct{}{} + <-walkerStatusStop + fmt.Print("\r") + if err != nil { + log.Fatalln(err) + } + close(entsWalker) + <-dirSizerJob + close(entsWriter) + <-writerJob + if err = bw.Flush(); err != nil { + log.Fatalln(err) + } + + tmp1 := applyDirSizes(tmp0, dirSizes) + tmp0.Close() + return tmp1 +} diff --git a/main.go b/main.go index d6abfc4..a5fcb41 100644 --- a/main.go +++ b/main.go @@ -1,429 +1,98 @@ -/* -glocate -- ZFS-diff-friendly locate-like utility -Copyright (C) 2022 Sergey Matveev - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, version 3 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - package main import ( "bufio" - "encoding/gob" "flag" - "fmt" - "io" - "io/fs" "log" "os" - "path" - "sort" - "strconv" "strings" "syscall" - "time" - - "github.com/dustin/go-humanize" - "github.com/klauspost/compress/zstd" ) -type File struct { - Name string - Size uint64 - Mtime int64 - Files []File +type Ent struct { + name []string + mtime int64 + size int64 } -type ByName []File - -func (a ByName) Len() int { - return len(a) -} - -func (a ByName) Swap(i, j int) { - a[i], a[j] = a[j], a[i] -} - -func (a ByName) Less(i, j int) bool { - return a[i].Name < a[j].Name -} - -func (file *File) IsDir() bool { - return file.Name[len(file.Name)-1] == '/' -} - -func walk(root string) ([]File, uint64, error) { - fd, err := os.Open(root) - if err != nil { - return nil, 0, err - } - var files []File - var size uint64 - var info fs.FileInfo - for { - ents, err := fd.ReadDir(1 << 10) - if err != nil { - if err == io.EOF { - break - } - fd.Close() - return nil, 0, err - } - for _, ent := range ents { - file := File{Name: ent.Name()} - fullPath := path.Join(root, file.Name) - if ent.IsDir() { - file.Name += "/" - } - info, err = ent.Info() - if err != nil { - log.Println("can not stat:", fullPath, ":", err) - files = append(files, file) - continue - } - file.Mtime = info.ModTime().Unix() - if ent.IsDir() { - file.Files, file.Size, err = walk(fullPath) - if err != nil { - log.Println("can not walk:", fullPath, ":", err) - files = append(files, file) - continue - } - } else if info.Mode().IsRegular() { - file.Size = uint64(info.Size()) - } - files = append(files, file) - size += file.Size - } - } - fd.Close() - sort.Sort(ByName(files)) - return files, size, nil -} - -func usage() { - log.Println("usage") - os.Exit(1) -} - -func load(dbPath string) *File { - fd, err := os.Open(dbPath) - if err != nil { - log.Fatalln(err) - } - defer fd.Close() - comp, err := zstd.NewReader(fd) - if err != nil { - log.Fatalln(err) - } - dec := gob.NewDecoder(comp) - var file File - err = dec.Decode(&file) - if err != nil { - log.Fatalln(err) - } - comp.Close() - return &file +func (ent *Ent) IsDir() bool { + return IsDir(ent.name[len(ent.name)-1]) } -func (db *File) dump(dbPath string) error { - tmp, err := os.CreateTemp(path.Dir(dbPath), "glocate") - if err != nil { - return err - } - defer os.Remove(tmp.Name()) - comp, err := zstd.NewWriter( - tmp, zstd.WithEncoderLevel(zstd.SpeedBestCompression), - ) - if err != nil { - return err - } - enc := gob.NewEncoder(comp) - err = enc.Encode(db) - if err != nil { - return err - } - err = comp.Close() - if err != nil { - return err - } - err = tmp.Close() - if err != nil { - return err - } +func dbCommit(dbPath string, tmp *os.File) { umask := syscall.Umask(0) syscall.Umask(umask) - err = os.Chmod(tmp.Name(), os.FileMode(0666&^umask)) - if err != nil { - return err - } - return os.Rename(tmp.Name(), dbPath) -} - -func (file *File) listBeauty(indent string, n int, isLast, veryFirst bool) { - if veryFirst { - fmt.Printf("[%s]\n", humanize.IBytes(file.Size)) - } else { - var box string - if isLast { - box = "└" - } else { - box = "├" - } - name := file.Name - fmt.Printf("%s%s %s\t№%d [%s] %s\n", - indent, box, name, n, humanize.IBytes(file.Size), - time.Unix(file.Mtime, 0).Format("2006-01-02"), - ) - if isLast { - indent += " " - } else { - indent += "│ " - } - } - for n, f := range file.Files { - n++ - f.listBeauty(indent, n, n == len(file.Files), false) - } -} - -func (file *File) listSimple(root string, veryFirst bool) { - name := file.Name - fmt.Println( - strconv.FormatUint(file.Size, 10), - time.Unix(file.Mtime, 0).Format("2006-01-02T15:04:05"), - root+name, - ) - if veryFirst { - name = "" - } - for _, f := range file.Files { - f.listSimple(root+name, false) - } -} - -func (file *File) listFiles(root string, veryFirst bool) { - name := file.Name - if veryFirst { - root = "" - } else { - fmt.Println(root + name) - root += name - } - for _, f := range file.Files { - f.listFiles(root, false) - } -} - -func (db *File) find(p string) (file *File, parents []*File, idx int, err error) { - file = db - var f File -Entities: - for _, ent := range strings.Split(p, "/") { - for idx, f = range file.Files { - if (ent == f.Name) || (ent+"/" == f.Name) { - parents = append(parents, file) - file = &f - continue Entities - } - } - err = fmt.Errorf("no entity found: %s", ent) - return - } - return -} - -func (db *File) remove(p string) error { - file, parents, idx, err := db.find(p) - if err != nil { - return err - } - lastParent := parents[len(parents)-1] - lastParent.Files = append( - lastParent.Files[:idx], - lastParent.Files[idx+1:]..., - ) - for _, parent := range parents { - parent.Size -= file.Size - } - return nil -} - -func (db *File) add(p string) error { - cols := strings.Split(p, "/") - cols, name := cols[:len(cols)-1], cols[len(cols)-1] - var parent *File - var err error - if len(cols) != 0 { - parent, _, _, err = db.find(path.Join(cols...)) - if err != nil { - return err - } - } else { - parent = db - } - info, err := os.Stat(p) - if err != nil { - return err - } - if info.IsDir() { - name += "/" - } - file := File{ - Name: name, - Size: uint64(info.Size()), - Mtime: info.ModTime().Unix(), + if err := os.Chmod(tmp.Name(), os.FileMode(0666&^umask)); err != nil { + log.Fatalln(err) } - parent.Files = append(parent.Files, file) - sort.Sort(ByName(parent.Files)) - parent.Size += file.Size - return nil -} - -func deoctalize(s string) string { - chars := make([]byte, 0, len(s)) - for i := 0; i < len(s); i++ { - if s[i] == '\\' { - b, err := strconv.ParseUint("0"+s[i+1:i+1+3], 0, 8) - if err != nil { - log.Fatalln(err) - } - chars = append(chars, byte(b)) - i += 3 - } else { - chars = append(chars, s[i]) - } + if err := os.Rename(tmp.Name(), dbPath); err != nil { + log.Fatalln(err) } - return string(chars) } func main() { - dbPath := flag.String("db", ".glocate.db", "Path to state file (database)") - doIndex := flag.Bool("index", false, "Initialize database") - doUpdate := flag.Bool("update", false, "Update database by zfs-diff's output") - showBeauty := flag.Bool("show-beauty", false, "Show beauty human-friendly listing") - showSimple := flag.Bool("show-simple", false, "Show simple listing") - stripPrefix := flag.String("strip-prefix", "", "Strip prefix from zfs-diff's output") + dbPath := flag.String("db", "glocate.db", "Path to database") + doIndex := flag.Bool("index", false, "Perform indexing") + doUpdate := flag.Bool("update", false, "Feed zfs-diff and update the database") + strip := flag.String("strip", "", "Strip prefix from zfs-diff's paths") + showMachine := flag.Bool("machine", false, "Show machine friendly") + showTree := flag.Bool("tree", false, "Show human-friendly tree") + dryRun := flag.Bool("n", false, "Dry run, do not overwrite database") flag.Parse() log.SetFlags(log.Ldate | log.Ltime | log.Lmicroseconds | log.Lshortfile) if *doIndex { - files, size, err := walk(".") - if err != nil { - log.Fatalln(err) - } - db := File{Name: "./", Size: size, Files: files} - err = db.dump(*dbPath) - if err != nil { - log.Fatalln(err) + tmp := index() + tmp.Close() + if !*dryRun { + dbCommit(*dbPath, tmp) } return } - db := load(*dbPath) if *doUpdate { - scanner := bufio.NewScanner(os.Stdin) - var t string - for scanner.Scan() { - t = scanner.Text() - if len(t) == 0 { - continue - } - cols := strings.Split(t, "\t") - if len(cols) < 2 { - log.Fatalln("bad zfs-diff format") - } - switch cols[0] { - case "-": - name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix)) - if err := db.remove(name); err != nil { - log.Println("can not -:", name, ":", err) - } - case "+": - name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix)) - if err := db.add(name); err != nil { - log.Println("can not +:", name, ":", err) - } - case "M": - name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix)) - if name == "" { - continue - } - file, _, _, err := db.find(name) - if err != nil { - log.Println("can not M:", name, ":", err) - continue - } - info, err := os.Stat(name) - if err != nil { - log.Println("can not M:", name, ":", err) - continue - } - if info.Mode().IsRegular() { - file.Size = uint64(info.Size()) - } - file.Mtime = info.ModTime().Unix() - case "R": - if len(cols) != 3 { - log.Fatalln("bad zfs-diff format for R") - } - name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix)) - if err := db.remove(name); err != nil { - log.Println("can not R-:", name, ":", err) - continue - } - name = deoctalize(strings.TrimPrefix(cols[2], *stripPrefix)) - if err := db.add(name); err != nil { - log.Println("can not R+:", name, ":", err) - } - default: - log.Fatalln("bad zfs-diff format") - } - } - if err := scanner.Err(); err != nil { - log.Fatalln(err) - } - if err := db.dump(*dbPath); err != nil { - log.Fatalln(err) + tmp := updateWithDiff(*dbPath, *strip) + tmp.Close() + if !*dryRun { + dbCommit(*dbPath, tmp) } return } - veryFirst := true - if len(flag.Args()) > 0 { - root := flag.Args()[0] - if root[:2] == "./" { - root = root[2:] - } - if root[len(root)-1:] == "/" { - root = root[:len(root)-1] - } - file, _, _, err := db.find(root) - if err != nil { - log.Fatalln(err) - } - db = file - db.Name = root + "/" - veryFirst = false + db, err := os.Open(*dbPath) + if err != nil { + log.Fatalln(err) } + entsReader := make(chan Ent, 1<<10) + go reader(bufio.NewReaderSize(db, 1<<17), entsReader) - if *showBeauty { - db.listBeauty("", 0, false, veryFirst) - return + entsPrinter := make(chan Ent, 1<<10) + printerJob := make(chan struct{}) + go func() { + if *showMachine { + printerMachine(entsPrinter) + } else if *showTree { + printerTree(entsPrinter) + } else { + printerSimple(entsPrinter) + } + close(printerJob) + }() + + var root []string + if len(flag.Args()) > 0 { + root = strings.Split("./"+flag.Arg(0), "/") } - if *showSimple { - db.listSimple("", veryFirst) - return + + rootMet := false + for ent := range entsReader { + if hasPrefix(ent.name, root) { + entsPrinter <- ent + rootMet = true + } else if rootMet { + break + } } - db.listFiles("", veryFirst) + close(entsPrinter) + <-printerJob } diff --git a/names.go b/names.go new file mode 100644 index 0000000..ad60435 --- /dev/null +++ b/names.go @@ -0,0 +1,92 @@ +package main + +import ( + "log" + "path" + "strconv" + "strings" +) + +func IsDir(s string) bool { + return s[len(s)-1] == '/' +} + +type ByName [][]string + +func (a ByName) Len() int { + return len(a) +} + +func (a ByName) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a ByName) Less(i, j int) bool { + return namesCmp(a[i], a[j]) < 0 +} + +func nameSplit(name string) []string { + cols := strings.Split(name, "/") + if IsDir(name) { + cols = cols[:len(cols)-1] + cols[len(cols)-1] += "/" + } + return cols +} + +func nameJoin(name []string) (s string) { + s = path.Join(name...) + if IsDir(name[len(name)-1]) { + s += "/" + } + return +} + +func namesCmp(n1, n2 []string) int { + min := len(n1) + if len(n2) < min { + min = len(n2) + } + var t1, t2 string + for i := 0; i < min; i++ { + t1 = strings.TrimSuffix(n1[i], "/") + t2 = strings.TrimSuffix(n2[i], "/") + if t1 < t2 { + return -1 + } + if t1 > t2 { + return +1 + } + } + if len(n1) > len(n2) { + return +1 + } + if len(n1) < len(n2) { + return -1 + } + return 0 +} + +func hasPrefix(name, prefix []string) bool { + if len(name) < len(prefix) { + return false + } + return namesCmp(name[:len(prefix)], prefix) == 0 +} + +func deoctalize(s string) string { + chars := make([]byte, 0, len(s)) + for i := 0; i < len(s); i++ { + if s[i] == '\\' { + b, err := strconv.ParseUint("0"+s[i+1:i+1+3], 0, 8) + if err != nil { + log.Fatalln(err) + } + chars = append(chars, byte(b)) + i += 3 + } else { + chars = append(chars, s[i]) + } + } + return string(chars) +} diff --git a/printers.go b/printers.go new file mode 100644 index 0000000..d080508 --- /dev/null +++ b/printers.go @@ -0,0 +1,72 @@ +package main + +import ( + "fmt" + "strconv" + "strings" + "time" + + "github.com/dustin/go-humanize" +) + +func printerSimple(ents chan Ent) { + for ent := range ents { + fmt.Println(nameJoin(ent.name)) + } +} + +func printerMachine(ents chan Ent) { + for ent := range ents { + fmt.Println( + strconv.FormatUint(uint64(ent.size), 10), + time.Unix(int64(ent.mtime), 0).Format("2006-01-02T15:04:05"), + nameJoin(ent.name), + ) + } +} + +type TreePrintEnt struct { + ent Ent + isLast bool +} + +func laster(ents chan Ent, trees chan TreePrintEnt) { + entPrev := <-ents + for ent := range ents { + tree := TreePrintEnt{ent: entPrev} + if len(ent.name) < len(entPrev.name) { + tree.isLast = true + } + trees <- tree + entPrev = ent + } + trees <- TreePrintEnt{ent: entPrev} + close(trees) +} + +func printerTree(ents chan Ent) { + trees := make(chan TreePrintEnt, 1<<10) + go laster(ents, trees) + first := true + var box string + for ent := range trees { + if first { + fmt.Printf( + "%s\t[%s]\n", nameJoin(ent.ent.name), + humanize.IBytes(uint64(ent.ent.size)), + ) + first = false + continue + } + if ent.isLast { + box = "└" + } else { + box = "├" + } + fmt.Printf("%s%s %s\t[%s] %s\n", + strings.Repeat("│ ", len(ent.ent.name)-2), box, + nameJoin(ent.ent.name), humanize.IBytes(uint64(ent.ent.size)), + time.Unix(ent.ent.mtime, 0).Format("2006-01-02"), + ) + } +} diff --git a/reader.go b/reader.go new file mode 100644 index 0000000..269fc32 --- /dev/null +++ b/reader.go @@ -0,0 +1,51 @@ +package main + +import ( + "encoding/binary" + "io" + "log" +) + +func mustReadFull(r io.Reader, buf []byte) { + if _, err := io.ReadFull(r, buf); err != nil { + log.Fatalln(err) + } +} + +func reader(r io.Reader, sink chan Ent) { + var err error + num := make([]byte, 8) + var cols []string + var namePrev string + var nameLen uint16 + var depth, depthPrev uint8 + for { + _, err = io.ReadFull(r, num[:2]) + if err != nil { + if err == io.EOF { + break + } + log.Fatalln(err) + } + nameLen = binary.BigEndian.Uint16(num[:2]) + nameRaw := make([]byte, nameLen) + mustReadFull(r, nameRaw) + name := string(nameRaw) + mustReadFull(r, num[:1]) + depth = uint8(num[0]) + mustReadFull(r, num) + ent := Ent{mtime: int64(binary.BigEndian.Uint64(num))} + mustReadFull(r, num) + ent.size = int64(binary.BigEndian.Uint64(num)) + if depth > depthPrev { + cols = append(cols, namePrev[:len(namePrev)-1]) + } else if depth < depthPrev { + cols = cols[:len(cols)-int(depthPrev-depth)] + } + ent.name = append([]string{}, append(cols, name)...) + sink <- ent + namePrev = name + depthPrev = depth + } + close(sink) +} diff --git a/status.go b/status.go new file mode 100644 index 0000000..342c3ba --- /dev/null +++ b/status.go @@ -0,0 +1,19 @@ +package main + +import ( + "fmt" + "time" +) + +func walkerStatus(stop chan struct{}) { + tick := time.Tick(time.Second) + for { + fmt.Printf("\r%d files %d directories", WalkerFiles, WalkerDirs) + select { + case <-tick: + case <-stop: + close(stop) + return + } + } +} diff --git a/walker.go b/walker.go new file mode 100644 index 0000000..4a91de7 --- /dev/null +++ b/walker.go @@ -0,0 +1,52 @@ +package main + +import ( + "io/fs" + "log" + "os" + "path" +) + +var ( + WalkerFiles int64 + WalkerDirs int64 +) + +func walker(sink chan Ent, root []string) error { + files, err := os.ReadDir(path.Join(root...)) // it is already sorted + if err != nil { + return err + } + var info fs.FileInfo + ents := make([]Ent, 0, len(files)) + for _, file := range files { + ent := Ent{name: append([]string{}, append(root, file.Name())...)} + info, err = file.Info() + if err == nil { + if info.IsDir() { + ent.name[len(ent.name)-1] += "/" + } else if info.Mode().IsRegular() { + ent.size = info.Size() + } + ent.mtime = info.ModTime().Unix() + } else { + log.Println("can not stat:", path.Join(ent.name...), ":", err) + } + ents = append(ents, ent) + } + for _, ent := range ents { + sink <- ent + if ent.IsDir() { + WalkerDirs++ + } else { + WalkerFiles++ + continue + } + err = walker(sink, ent.name) + if err != nil { + log.Println("can not stat:", path.Join(ent.name...), ":", err) + continue + } + } + return nil +} diff --git a/writer.go b/writer.go new file mode 100644 index 0000000..e12a19c --- /dev/null +++ b/writer.go @@ -0,0 +1,35 @@ +package main + +import ( + "encoding/binary" + "io" + "log" +) + +func mustWrite(w io.Writer, buf []byte) { + if _, err := w.Write(buf); err != nil { + log.Fatalln(err) + } +} + +func writer(w io.Writer, sink chan Ent) { + num := make([]byte, 8) + var name string + for ent := range sink { + name = ent.name[len(ent.name)-1] + if len(ent.name) >= 1<<16 { + panic("too long") + } + binary.BigEndian.PutUint16(num[:2], uint16(len(name))) + mustWrite(w, num[:2]) + mustWrite(w, []byte(name)) + if len(ent.name) >= 1<<8 { + panic("too deep") + } + mustWrite(w, []byte{byte(len(ent.name) - 1)}) + binary.BigEndian.PutUint64(num, uint64(ent.mtime)) + mustWrite(w, num) + binary.BigEndian.PutUint64(num, uint64(ent.size)) + mustWrite(w, num) + } +} -- 2.44.0