From: Sergey Matveev Date: Mon, 8 Aug 2022 17:03:55 +0000 (+0300) Subject: Improved version X-Git-Tag: v0.1.0~8 X-Git-Url: http://www.git.stargrave.org/?p=glocate.git;a=commitdiff_plain;h=d5b8c235a1f3088c6c1e7261df3d1b565d042db2ba2ad1bbd1018782b9178e1f Improved version --- diff --git a/README b/README index 5e946c2..358d530 100644 --- a/README +++ b/README @@ -1,4 +1,89 @@ glocate -- ZFS-diff-friendly locate-like utility +This utility is intended to keep the database of filesystem hierarchy +and quickly display some part of it. Like ordinary *locate utilities. +But unlike others, it is able to eat zfs-diff's output and apply the +changes to existing database. + +Why I wrote it? Indexing, just "find /big" can take a considerable +amount of time, like an hour or so, with many I/O operations spent. But +my home NAS has relatively few number of changes made every day. The +only possible quick way to determine what exactly was modified is to +traverse over ZFS'es Merkle trees to find a difference between +snapshots. Fortunately zfs-diff command does exactly that, providing +pretty machine-friendly output. + +Why this utility is so complicated? Initially it kept all database in +memory, but that takes 2-3 GiBs of memory, that is huge amount. Moreover +it fully loads it to perform any basic searches. So current +implementation uses temporary files and heavy use of data streaming. + +Its storage format is trivial: + +* 16-bit BE size of the following name +* entity (file, directory, symbolic link, etc) name itself. + Directory has trailing "/" +* single byte indicating current file's depth +* 64-bit BE mtime seconds +* 64-bit BE file or directory (sum of all files and directories) size + +Its indexing algorithm is following: + +* traverse over all filesystem hierarchy in a *sorted* order. All + records are written to temporary file, without directory sizes, + because they are not known in advance during the walking +* during the walk, remember in memory each directory's total size +* read all records from that temporary file, writing to another one, but + replacing directory sizes with ones remembered + +Searching is trivial: + +* there is no actual searching, just a streaming through all the + database file sequentially +* if some root is specified, then the program will output only its + hierarchy path, exiting after it is finished + +Updating algorithm is following: + +* read all [-+MR] actions from zfs-diff, validating the whole format +* each file's "R" becomes "-" and "+" actions +* if there are directory "R", then collect them and stream from current + database to determine each path entity you have to "-" and "+" +* each "+" adds an entry to the list of "M"s +* sort all "-", "+" and "M" filenames in ascending order +* get entity's information for each "M" (remembering its size and mtime) +* stream current database records, writing them to temporary file +* if record exists in "-"-list, then skip it +* if any "+" exists in the *sorted* list, that has precedence over the + record from database, then insert it into the stream, taking size and + mtime information from "M"-list +* if any "M" exists for the read record, then use it to alter it +* all that time, directory size calculating algorithm also works, the + same one used during indexing +* create another temporary file to copy the records with actualized + directory sizes + +How to use it? + + $ zfs snap big@snap1 + $ cd /big ; glocate -db /tmp/glocate.db -index + + $ glocate -db /tmp/glocate.db + [list of all files] + + $ glocate -db /tmp/glocate.db -machine + [machine parseable list of files with sizes and mtimes] + + $ glocate -db /tmp/glocate.db -tree + [beauty tree-like list of files with sizes and mtimes] + + $ glocate -db /tmp/glocate.db some/sub/path + [just a part of the whole hierarchy] + +and update it carefully: + + $ zfs snap big@snap2 + $ zfs diff -FH big@snap2 | glocate -db /tmp/glocate.db -strip /big/ -update + glocate is copylefted free software: see the file COPYING for copying conditions. diff --git a/diff.go b/diff.go new file mode 100644 index 0000000..61ba1ac --- /dev/null +++ b/diff.go @@ -0,0 +1,254 @@ +package main + +import ( + "bufio" + "io" + "log" + "os" + "sort" + "strings" +) + +type Ren struct { + src []string + dst []string +} + +type BySrc []Ren + +func (a BySrc) Len() int { + return len(a) +} + +func (a BySrc) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a BySrc) Less(i, j int) bool { + return namesCmp(a[i].src, a[j].src) < 0 +} + +type EntByName []*Ent + +func (a EntByName) Len() int { + return len(a) +} + +func (a EntByName) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a EntByName) Less(i, j int) bool { + return namesCmp(a[i].name, a[j].name) < 0 +} + +func updateWithDiff(dbPath, strip string) *os.File { + scanner := bufio.NewScanner(os.Stdin) + var t string + var delsNames []string + var addsNames []string + var modsNames []string + var rens []Ren + var isDir bool + for scanner.Scan() { + t = scanner.Text() + if len(t) == 0 { + continue + } + cols := strings.Split(t, "\t") + if len(cols) < 3 { + log.Fatalln("bad zfs-diff format") + } + isDir = cols[1] == "/" + name := deoctalize(strings.TrimPrefix(cols[2], strip)) + if name == "" { + continue + } + name = "./" + name + if isDir { + name += "/" + } + switch cols[0] { + case "-": + delsNames = append(delsNames, name) + case "+": + addsNames = append(addsNames, name) + case "M": + modsNames = append(modsNames, name) + case "R": + if len(cols) != 4 { + log.Fatalln("bad zfs-diff format for R") + } + dst := "./" + deoctalize(strings.TrimPrefix(cols[3], strip)) + if isDir { + dst += "/" + rens = append(rens, Ren{ + src: nameSplit(name), + dst: nameSplit(dst), + }) + } else { + delsNames = append(delsNames, name) + addsNames = append(addsNames, dst) + } + default: + log.Fatalln("bad zfs-diff format") + } + } + + entsReader := make(chan Ent, 1<<10) + db, err := os.Open(dbPath) + if err != nil { + log.Fatalln(err) + } + dels := make([][]string, 0, len(delsNames)+len(rens)) + adds := make([][]string, 0, len(addsNames)+len(rens)) + mods := make([]*Ent, 0, len(modsNames)+len(rens)) + if len(rens) > 0 { + sort.Sort(BySrc(rens)) + go reader(bufio.NewReaderSize(db, 1<<17), entsReader) + var ent Ent + var ok, met bool + for { + ent, ok = <-entsReader + if !ok { + break + } + Retry: + if len(rens) > 0 { + if hasPrefix(ent.name, rens[0].src) { + dels = append(dels, ent.name) + dst := append( + append([]string{}, rens[0].dst...), + ent.name[len(rens[0].src):]..., + ) + adds = append(adds, dst) + mods = append(mods, &Ent{name: dst}) + if !met { + // strip "/" from prefix directory + dst := rens[0].dst + last := dst[len(dst)-1] + dst[len(dst)-1] = last[:len(last)-1] + met = true + } + } else if met { + met = false + rens = rens[1:] + goto Retry + } + } + } + rens = nil + } + + for _, name := range delsNames { + dels = append(dels, nameSplit(name)) + } + delsNames = nil + sort.Sort(ByName(dels)) + + for _, name := range addsNames { + adds = append(adds, nameSplit(name)) + modsNames = append(modsNames, name) + } + addsNames = nil + sort.Sort(ByName(adds)) + + for _, name := range modsNames { + mods = append(mods, &Ent{name: nameSplit(name)}) + } + modsNames = nil + sort.Sort(EntByName(mods)) + var info os.FileInfo + for _, ent := range mods { + info, err = os.Stat(nameJoin(ent.name)) + if err != nil { + log.Println("can not stat:", nameJoin(ent.name), ":", err) + continue + } + if info.Mode().IsRegular() { + ent.size = info.Size() + } + ent.mtime = info.ModTime().Unix() + } + + _, err = db.Seek(0, io.SeekStart) + if err != nil { + log.Fatalln(err) + } + tmp0, err := os.CreateTemp("", "glocate-idx") + if err != nil { + log.Fatalln(err) + } + defer os.Remove(tmp0.Name()) + entsReader = make(chan Ent, 1<<10) + entsDirSizer := make(chan Ent, 1<<10) + entsWriter := make(chan Ent, 1<<10) + go reader(bufio.NewReaderSize(db, 1<<17), entsReader) + + dirSizerJob := make(chan struct{}) + var dirSizes []int64 + sinkBack := make(chan Ent, 1) + go func() { + dirSizer(&dirSizes, 1, sinkBack, entsDirSizer, entsWriter) + close(dirSizerJob) + }() + + bw := bufio.NewWriterSize(tmp0, 1<<17) + writerJob := make(chan struct{}) + go func() { + writer(bw, entsWriter) + close(writerJob) + }() + + for ent := range entsReader { + if len(dels) > 0 && namesCmp(ent.name, dels[0]) == 0 { + dels = dels[1:] + continue + } + for len(adds) > 0 && namesCmp(adds[0], ent.name) < 0 { + if namesCmp(mods[0].name, adds[0]) != 0 { + panic("+ and M lists are out of sync") + } + newEnt := Ent{ + name: adds[0], + mtime: mods[0].mtime, + size: mods[0].size, + } + entsDirSizer <- newEnt + adds = adds[1:] + mods = mods[1:] + } + if len(mods) > 0 && namesCmp(ent.name, mods[0].name) == 0 { + ent.mtime = mods[0].mtime + ent.size = mods[0].size + mods = mods[1:] + } + entsDirSizer <- ent + } + for len(adds) > 0 { + if namesCmp(mods[0].name, adds[0]) != 0 { + panic("+ and M lists are out of sync") + } + newEnt := Ent{ + name: adds[0], + mtime: mods[0].mtime, + size: mods[0].size, + } + entsDirSizer <- newEnt + adds = adds[1:] + mods = mods[1:] + } + + close(entsDirSizer) + <-dirSizerJob + close(entsWriter) + <-writerJob + if err = bw.Flush(); err != nil { + log.Fatalln(err) + } + + tmp1 := applyDirSizes(tmp0, dirSizes) + tmp0.Close() + os.Remove(tmp0.Name()) + return tmp1 +} diff --git a/dirsizer.go b/dirsizer.go new file mode 100644 index 0000000..b4f5b4e --- /dev/null +++ b/dirsizer.go @@ -0,0 +1,94 @@ +package main + +import ( + "bufio" + "encoding/binary" + "io" + "log" + "os" +) + +func dirSizer(dirSizes *[]int64, depth int, sinkBack, sinkIn, sinkOut chan Ent) (curSize int64) { + var ent Ent + var opened bool + var dirIdx int + for { + select { + case ent = <-sinkBack: + goto Got + default: + } + ent, opened = <-sinkIn + if !opened { + break + } + Got: + if len(ent.name) < depth { + sinkBack <- ent + return + } + sinkOut <- ent + curSize += ent.size + if !ent.IsDir() { + continue + } + dirIdx = len(*dirSizes) + (*dirSizes) = append(*dirSizes, 0) + dirSize := dirSizer(dirSizes, depth+1, sinkBack, sinkIn, sinkOut) + (*dirSizes)[dirIdx] = dirSize + curSize += dirSize + } + return +} + +func applyDirSizes(src *os.File, dirSizes []int64) *os.File { + _, err := src.Seek(0, io.SeekStart) + if err != nil { + log.Fatalln(err) + } + tmp, err := os.CreateTemp("", "glocate-idx") + if err != nil { + log.Fatalln(err) + } + + br := bufio.NewReaderSize(src, 1<<17) + num := make([]byte, 8) + var nameLen int + name := make([]byte, 0, 1<<16) + bw := bufio.NewWriterSize(tmp, 1<<17) + var dirIdx int + for { + if _, err = io.ReadFull(br, num[:2]); err != nil { + if err == io.EOF { + break + } + log.Fatalln(err) + } + mustWrite(bw, num[:2]) + nameLen = int(binary.BigEndian.Uint16(num[:2])) + name = name[:nameLen] + if _, err = io.ReadFull(br, name); err != nil { + log.Fatalln(err) + } + mustWrite(bw, name) + if _, err = io.CopyN(bw, br, 1+8); err != nil { + log.Fatalln(err) + } + if name[len(name)-1] == byte('/') { + if _, err = br.Discard(8); err != nil { + log.Fatalln(err) + } + binary.BigEndian.PutUint64(num, uint64(dirSizes[dirIdx])) + mustWrite(bw, num) + dirIdx++ + } else { + if _, err = io.CopyN(bw, br, 8); err != nil { + log.Fatalln(err) + } + } + } + if err = bw.Flush(); err != nil { + log.Fatalln(err) + } + return tmp +} diff --git a/go.mod b/go.mod index 21a56ac..f8654fc 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,4 @@ module go.stargrave.org/glocate go 1.18 -require ( - github.com/dustin/go-humanize v1.0.0 - github.com/klauspost/compress v1.15.8 -) +require github.com/dustin/go-humanize v1.0.0 diff --git a/go.sum b/go.sum index b9239e0..4f89ea4 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,2 @@ github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/klauspost/compress v1.15.8 h1:JahtItbkWjf2jzm/T+qgMxkP9EMHsqEUA6vCMGmXvhA= -github.com/klauspost/compress v1.15.8/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= diff --git a/index.go b/index.go new file mode 100644 index 0000000..c6fa539 --- /dev/null +++ b/index.go @@ -0,0 +1,55 @@ +package main + +import ( + "bufio" + "fmt" + "log" + "os" +) + +func index() *os.File { + tmp0, err := os.CreateTemp("", "glocate-idx") + if err != nil { + log.Fatalln(err) + } + defer os.Remove(tmp0.Name()) + + entsWalker := make(chan Ent, 1<<10) + entsWriter := make(chan Ent, 1<<10) + dirSizerJob := make(chan struct{}) + var dirSizes []int64 + entsWalker <- Ent{name: []string{"./"}} + sinkBack := make(chan Ent, 1) + go func() { + dirSizer(&dirSizes, 1, sinkBack, entsWalker, entsWriter) + close(dirSizerJob) + }() + + bw := bufio.NewWriterSize(tmp0, 1<<17) + writerJob := make(chan struct{}) + go func() { + writer(bw, entsWriter) + close(writerJob) + }() + + walkerStatusStop := make(chan struct{}) + go walkerStatus(walkerStatusStop) + err = walker(entsWalker, []string{"./"}) + walkerStatusStop <- struct{}{} + <-walkerStatusStop + fmt.Print("\r") + if err != nil { + log.Fatalln(err) + } + close(entsWalker) + <-dirSizerJob + close(entsWriter) + <-writerJob + if err = bw.Flush(); err != nil { + log.Fatalln(err) + } + + tmp1 := applyDirSizes(tmp0, dirSizes) + tmp0.Close() + return tmp1 +} diff --git a/main.go b/main.go index d6abfc4..a5fcb41 100644 --- a/main.go +++ b/main.go @@ -1,429 +1,98 @@ -/* -glocate -- ZFS-diff-friendly locate-like utility -Copyright (C) 2022 Sergey Matveev - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, version 3 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - package main import ( "bufio" - "encoding/gob" "flag" - "fmt" - "io" - "io/fs" "log" "os" - "path" - "sort" - "strconv" "strings" "syscall" - "time" - - "github.com/dustin/go-humanize" - "github.com/klauspost/compress/zstd" ) -type File struct { - Name string - Size uint64 - Mtime int64 - Files []File +type Ent struct { + name []string + mtime int64 + size int64 } -type ByName []File - -func (a ByName) Len() int { - return len(a) -} - -func (a ByName) Swap(i, j int) { - a[i], a[j] = a[j], a[i] -} - -func (a ByName) Less(i, j int) bool { - return a[i].Name < a[j].Name -} - -func (file *File) IsDir() bool { - return file.Name[len(file.Name)-1] == '/' -} - -func walk(root string) ([]File, uint64, error) { - fd, err := os.Open(root) - if err != nil { - return nil, 0, err - } - var files []File - var size uint64 - var info fs.FileInfo - for { - ents, err := fd.ReadDir(1 << 10) - if err != nil { - if err == io.EOF { - break - } - fd.Close() - return nil, 0, err - } - for _, ent := range ents { - file := File{Name: ent.Name()} - fullPath := path.Join(root, file.Name) - if ent.IsDir() { - file.Name += "/" - } - info, err = ent.Info() - if err != nil { - log.Println("can not stat:", fullPath, ":", err) - files = append(files, file) - continue - } - file.Mtime = info.ModTime().Unix() - if ent.IsDir() { - file.Files, file.Size, err = walk(fullPath) - if err != nil { - log.Println("can not walk:", fullPath, ":", err) - files = append(files, file) - continue - } - } else if info.Mode().IsRegular() { - file.Size = uint64(info.Size()) - } - files = append(files, file) - size += file.Size - } - } - fd.Close() - sort.Sort(ByName(files)) - return files, size, nil -} - -func usage() { - log.Println("usage") - os.Exit(1) -} - -func load(dbPath string) *File { - fd, err := os.Open(dbPath) - if err != nil { - log.Fatalln(err) - } - defer fd.Close() - comp, err := zstd.NewReader(fd) - if err != nil { - log.Fatalln(err) - } - dec := gob.NewDecoder(comp) - var file File - err = dec.Decode(&file) - if err != nil { - log.Fatalln(err) - } - comp.Close() - return &file +func (ent *Ent) IsDir() bool { + return IsDir(ent.name[len(ent.name)-1]) } -func (db *File) dump(dbPath string) error { - tmp, err := os.CreateTemp(path.Dir(dbPath), "glocate") - if err != nil { - return err - } - defer os.Remove(tmp.Name()) - comp, err := zstd.NewWriter( - tmp, zstd.WithEncoderLevel(zstd.SpeedBestCompression), - ) - if err != nil { - return err - } - enc := gob.NewEncoder(comp) - err = enc.Encode(db) - if err != nil { - return err - } - err = comp.Close() - if err != nil { - return err - } - err = tmp.Close() - if err != nil { - return err - } +func dbCommit(dbPath string, tmp *os.File) { umask := syscall.Umask(0) syscall.Umask(umask) - err = os.Chmod(tmp.Name(), os.FileMode(0666&^umask)) - if err != nil { - return err - } - return os.Rename(tmp.Name(), dbPath) -} - -func (file *File) listBeauty(indent string, n int, isLast, veryFirst bool) { - if veryFirst { - fmt.Printf("[%s]\n", humanize.IBytes(file.Size)) - } else { - var box string - if isLast { - box = "└" - } else { - box = "├" - } - name := file.Name - fmt.Printf("%s%s %s\t№%d [%s] %s\n", - indent, box, name, n, humanize.IBytes(file.Size), - time.Unix(file.Mtime, 0).Format("2006-01-02"), - ) - if isLast { - indent += " " - } else { - indent += "│ " - } - } - for n, f := range file.Files { - n++ - f.listBeauty(indent, n, n == len(file.Files), false) - } -} - -func (file *File) listSimple(root string, veryFirst bool) { - name := file.Name - fmt.Println( - strconv.FormatUint(file.Size, 10), - time.Unix(file.Mtime, 0).Format("2006-01-02T15:04:05"), - root+name, - ) - if veryFirst { - name = "" - } - for _, f := range file.Files { - f.listSimple(root+name, false) - } -} - -func (file *File) listFiles(root string, veryFirst bool) { - name := file.Name - if veryFirst { - root = "" - } else { - fmt.Println(root + name) - root += name - } - for _, f := range file.Files { - f.listFiles(root, false) - } -} - -func (db *File) find(p string) (file *File, parents []*File, idx int, err error) { - file = db - var f File -Entities: - for _, ent := range strings.Split(p, "/") { - for idx, f = range file.Files { - if (ent == f.Name) || (ent+"/" == f.Name) { - parents = append(parents, file) - file = &f - continue Entities - } - } - err = fmt.Errorf("no entity found: %s", ent) - return - } - return -} - -func (db *File) remove(p string) error { - file, parents, idx, err := db.find(p) - if err != nil { - return err - } - lastParent := parents[len(parents)-1] - lastParent.Files = append( - lastParent.Files[:idx], - lastParent.Files[idx+1:]..., - ) - for _, parent := range parents { - parent.Size -= file.Size - } - return nil -} - -func (db *File) add(p string) error { - cols := strings.Split(p, "/") - cols, name := cols[:len(cols)-1], cols[len(cols)-1] - var parent *File - var err error - if len(cols) != 0 { - parent, _, _, err = db.find(path.Join(cols...)) - if err != nil { - return err - } - } else { - parent = db - } - info, err := os.Stat(p) - if err != nil { - return err - } - if info.IsDir() { - name += "/" - } - file := File{ - Name: name, - Size: uint64(info.Size()), - Mtime: info.ModTime().Unix(), + if err := os.Chmod(tmp.Name(), os.FileMode(0666&^umask)); err != nil { + log.Fatalln(err) } - parent.Files = append(parent.Files, file) - sort.Sort(ByName(parent.Files)) - parent.Size += file.Size - return nil -} - -func deoctalize(s string) string { - chars := make([]byte, 0, len(s)) - for i := 0; i < len(s); i++ { - if s[i] == '\\' { - b, err := strconv.ParseUint("0"+s[i+1:i+1+3], 0, 8) - if err != nil { - log.Fatalln(err) - } - chars = append(chars, byte(b)) - i += 3 - } else { - chars = append(chars, s[i]) - } + if err := os.Rename(tmp.Name(), dbPath); err != nil { + log.Fatalln(err) } - return string(chars) } func main() { - dbPath := flag.String("db", ".glocate.db", "Path to state file (database)") - doIndex := flag.Bool("index", false, "Initialize database") - doUpdate := flag.Bool("update", false, "Update database by zfs-diff's output") - showBeauty := flag.Bool("show-beauty", false, "Show beauty human-friendly listing") - showSimple := flag.Bool("show-simple", false, "Show simple listing") - stripPrefix := flag.String("strip-prefix", "", "Strip prefix from zfs-diff's output") + dbPath := flag.String("db", "glocate.db", "Path to database") + doIndex := flag.Bool("index", false, "Perform indexing") + doUpdate := flag.Bool("update", false, "Feed zfs-diff and update the database") + strip := flag.String("strip", "", "Strip prefix from zfs-diff's paths") + showMachine := flag.Bool("machine", false, "Show machine friendly") + showTree := flag.Bool("tree", false, "Show human-friendly tree") + dryRun := flag.Bool("n", false, "Dry run, do not overwrite database") flag.Parse() log.SetFlags(log.Ldate | log.Ltime | log.Lmicroseconds | log.Lshortfile) if *doIndex { - files, size, err := walk(".") - if err != nil { - log.Fatalln(err) - } - db := File{Name: "./", Size: size, Files: files} - err = db.dump(*dbPath) - if err != nil { - log.Fatalln(err) + tmp := index() + tmp.Close() + if !*dryRun { + dbCommit(*dbPath, tmp) } return } - db := load(*dbPath) if *doUpdate { - scanner := bufio.NewScanner(os.Stdin) - var t string - for scanner.Scan() { - t = scanner.Text() - if len(t) == 0 { - continue - } - cols := strings.Split(t, "\t") - if len(cols) < 2 { - log.Fatalln("bad zfs-diff format") - } - switch cols[0] { - case "-": - name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix)) - if err := db.remove(name); err != nil { - log.Println("can not -:", name, ":", err) - } - case "+": - name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix)) - if err := db.add(name); err != nil { - log.Println("can not +:", name, ":", err) - } - case "M": - name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix)) - if name == "" { - continue - } - file, _, _, err := db.find(name) - if err != nil { - log.Println("can not M:", name, ":", err) - continue - } - info, err := os.Stat(name) - if err != nil { - log.Println("can not M:", name, ":", err) - continue - } - if info.Mode().IsRegular() { - file.Size = uint64(info.Size()) - } - file.Mtime = info.ModTime().Unix() - case "R": - if len(cols) != 3 { - log.Fatalln("bad zfs-diff format for R") - } - name := deoctalize(strings.TrimPrefix(cols[1], *stripPrefix)) - if err := db.remove(name); err != nil { - log.Println("can not R-:", name, ":", err) - continue - } - name = deoctalize(strings.TrimPrefix(cols[2], *stripPrefix)) - if err := db.add(name); err != nil { - log.Println("can not R+:", name, ":", err) - } - default: - log.Fatalln("bad zfs-diff format") - } - } - if err := scanner.Err(); err != nil { - log.Fatalln(err) - } - if err := db.dump(*dbPath); err != nil { - log.Fatalln(err) + tmp := updateWithDiff(*dbPath, *strip) + tmp.Close() + if !*dryRun { + dbCommit(*dbPath, tmp) } return } - veryFirst := true - if len(flag.Args()) > 0 { - root := flag.Args()[0] - if root[:2] == "./" { - root = root[2:] - } - if root[len(root)-1:] == "/" { - root = root[:len(root)-1] - } - file, _, _, err := db.find(root) - if err != nil { - log.Fatalln(err) - } - db = file - db.Name = root + "/" - veryFirst = false + db, err := os.Open(*dbPath) + if err != nil { + log.Fatalln(err) } + entsReader := make(chan Ent, 1<<10) + go reader(bufio.NewReaderSize(db, 1<<17), entsReader) - if *showBeauty { - db.listBeauty("", 0, false, veryFirst) - return + entsPrinter := make(chan Ent, 1<<10) + printerJob := make(chan struct{}) + go func() { + if *showMachine { + printerMachine(entsPrinter) + } else if *showTree { + printerTree(entsPrinter) + } else { + printerSimple(entsPrinter) + } + close(printerJob) + }() + + var root []string + if len(flag.Args()) > 0 { + root = strings.Split("./"+flag.Arg(0), "/") } - if *showSimple { - db.listSimple("", veryFirst) - return + + rootMet := false + for ent := range entsReader { + if hasPrefix(ent.name, root) { + entsPrinter <- ent + rootMet = true + } else if rootMet { + break + } } - db.listFiles("", veryFirst) + close(entsPrinter) + <-printerJob } diff --git a/names.go b/names.go new file mode 100644 index 0000000..ad60435 --- /dev/null +++ b/names.go @@ -0,0 +1,92 @@ +package main + +import ( + "log" + "path" + "strconv" + "strings" +) + +func IsDir(s string) bool { + return s[len(s)-1] == '/' +} + +type ByName [][]string + +func (a ByName) Len() int { + return len(a) +} + +func (a ByName) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a ByName) Less(i, j int) bool { + return namesCmp(a[i], a[j]) < 0 +} + +func nameSplit(name string) []string { + cols := strings.Split(name, "/") + if IsDir(name) { + cols = cols[:len(cols)-1] + cols[len(cols)-1] += "/" + } + return cols +} + +func nameJoin(name []string) (s string) { + s = path.Join(name...) + if IsDir(name[len(name)-1]) { + s += "/" + } + return +} + +func namesCmp(n1, n2 []string) int { + min := len(n1) + if len(n2) < min { + min = len(n2) + } + var t1, t2 string + for i := 0; i < min; i++ { + t1 = strings.TrimSuffix(n1[i], "/") + t2 = strings.TrimSuffix(n2[i], "/") + if t1 < t2 { + return -1 + } + if t1 > t2 { + return +1 + } + } + if len(n1) > len(n2) { + return +1 + } + if len(n1) < len(n2) { + return -1 + } + return 0 +} + +func hasPrefix(name, prefix []string) bool { + if len(name) < len(prefix) { + return false + } + return namesCmp(name[:len(prefix)], prefix) == 0 +} + +func deoctalize(s string) string { + chars := make([]byte, 0, len(s)) + for i := 0; i < len(s); i++ { + if s[i] == '\\' { + b, err := strconv.ParseUint("0"+s[i+1:i+1+3], 0, 8) + if err != nil { + log.Fatalln(err) + } + chars = append(chars, byte(b)) + i += 3 + } else { + chars = append(chars, s[i]) + } + } + return string(chars) +} diff --git a/printers.go b/printers.go new file mode 100644 index 0000000..d080508 --- /dev/null +++ b/printers.go @@ -0,0 +1,72 @@ +package main + +import ( + "fmt" + "strconv" + "strings" + "time" + + "github.com/dustin/go-humanize" +) + +func printerSimple(ents chan Ent) { + for ent := range ents { + fmt.Println(nameJoin(ent.name)) + } +} + +func printerMachine(ents chan Ent) { + for ent := range ents { + fmt.Println( + strconv.FormatUint(uint64(ent.size), 10), + time.Unix(int64(ent.mtime), 0).Format("2006-01-02T15:04:05"), + nameJoin(ent.name), + ) + } +} + +type TreePrintEnt struct { + ent Ent + isLast bool +} + +func laster(ents chan Ent, trees chan TreePrintEnt) { + entPrev := <-ents + for ent := range ents { + tree := TreePrintEnt{ent: entPrev} + if len(ent.name) < len(entPrev.name) { + tree.isLast = true + } + trees <- tree + entPrev = ent + } + trees <- TreePrintEnt{ent: entPrev} + close(trees) +} + +func printerTree(ents chan Ent) { + trees := make(chan TreePrintEnt, 1<<10) + go laster(ents, trees) + first := true + var box string + for ent := range trees { + if first { + fmt.Printf( + "%s\t[%s]\n", nameJoin(ent.ent.name), + humanize.IBytes(uint64(ent.ent.size)), + ) + first = false + continue + } + if ent.isLast { + box = "└" + } else { + box = "├" + } + fmt.Printf("%s%s %s\t[%s] %s\n", + strings.Repeat("│ ", len(ent.ent.name)-2), box, + nameJoin(ent.ent.name), humanize.IBytes(uint64(ent.ent.size)), + time.Unix(ent.ent.mtime, 0).Format("2006-01-02"), + ) + } +} diff --git a/reader.go b/reader.go new file mode 100644 index 0000000..269fc32 --- /dev/null +++ b/reader.go @@ -0,0 +1,51 @@ +package main + +import ( + "encoding/binary" + "io" + "log" +) + +func mustReadFull(r io.Reader, buf []byte) { + if _, err := io.ReadFull(r, buf); err != nil { + log.Fatalln(err) + } +} + +func reader(r io.Reader, sink chan Ent) { + var err error + num := make([]byte, 8) + var cols []string + var namePrev string + var nameLen uint16 + var depth, depthPrev uint8 + for { + _, err = io.ReadFull(r, num[:2]) + if err != nil { + if err == io.EOF { + break + } + log.Fatalln(err) + } + nameLen = binary.BigEndian.Uint16(num[:2]) + nameRaw := make([]byte, nameLen) + mustReadFull(r, nameRaw) + name := string(nameRaw) + mustReadFull(r, num[:1]) + depth = uint8(num[0]) + mustReadFull(r, num) + ent := Ent{mtime: int64(binary.BigEndian.Uint64(num))} + mustReadFull(r, num) + ent.size = int64(binary.BigEndian.Uint64(num)) + if depth > depthPrev { + cols = append(cols, namePrev[:len(namePrev)-1]) + } else if depth < depthPrev { + cols = cols[:len(cols)-int(depthPrev-depth)] + } + ent.name = append([]string{}, append(cols, name)...) + sink <- ent + namePrev = name + depthPrev = depth + } + close(sink) +} diff --git a/status.go b/status.go new file mode 100644 index 0000000..342c3ba --- /dev/null +++ b/status.go @@ -0,0 +1,19 @@ +package main + +import ( + "fmt" + "time" +) + +func walkerStatus(stop chan struct{}) { + tick := time.Tick(time.Second) + for { + fmt.Printf("\r%d files %d directories", WalkerFiles, WalkerDirs) + select { + case <-tick: + case <-stop: + close(stop) + return + } + } +} diff --git a/walker.go b/walker.go new file mode 100644 index 0000000..4a91de7 --- /dev/null +++ b/walker.go @@ -0,0 +1,52 @@ +package main + +import ( + "io/fs" + "log" + "os" + "path" +) + +var ( + WalkerFiles int64 + WalkerDirs int64 +) + +func walker(sink chan Ent, root []string) error { + files, err := os.ReadDir(path.Join(root...)) // it is already sorted + if err != nil { + return err + } + var info fs.FileInfo + ents := make([]Ent, 0, len(files)) + for _, file := range files { + ent := Ent{name: append([]string{}, append(root, file.Name())...)} + info, err = file.Info() + if err == nil { + if info.IsDir() { + ent.name[len(ent.name)-1] += "/" + } else if info.Mode().IsRegular() { + ent.size = info.Size() + } + ent.mtime = info.ModTime().Unix() + } else { + log.Println("can not stat:", path.Join(ent.name...), ":", err) + } + ents = append(ents, ent) + } + for _, ent := range ents { + sink <- ent + if ent.IsDir() { + WalkerDirs++ + } else { + WalkerFiles++ + continue + } + err = walker(sink, ent.name) + if err != nil { + log.Println("can not stat:", path.Join(ent.name...), ":", err) + continue + } + } + return nil +} diff --git a/writer.go b/writer.go new file mode 100644 index 0000000..e12a19c --- /dev/null +++ b/writer.go @@ -0,0 +1,35 @@ +package main + +import ( + "encoding/binary" + "io" + "log" +) + +func mustWrite(w io.Writer, buf []byte) { + if _, err := w.Write(buf); err != nil { + log.Fatalln(err) + } +} + +func writer(w io.Writer, sink chan Ent) { + num := make([]byte, 8) + var name string + for ent := range sink { + name = ent.name[len(ent.name)-1] + if len(ent.name) >= 1<<16 { + panic("too long") + } + binary.BigEndian.PutUint16(num[:2], uint16(len(name))) + mustWrite(w, num[:2]) + mustWrite(w, []byte(name)) + if len(ent.name) >= 1<<8 { + panic("too deep") + } + mustWrite(w, []byte{byte(len(ent.name) - 1)}) + binary.BigEndian.PutUint64(num, uint64(ent.mtime)) + mustWrite(w, num) + binary.BigEndian.PutUint64(num, uint64(ent.size)) + mustWrite(w, num) + } +}