X-Git-Url: http://www.git.stargrave.org/?p=glocate.git;a=blobdiff_plain;f=diff.go;fp=diff.go;h=61ba1ac9f83e9f80cb3ac2a7b81e88079e1e9a2d3a8e8afabd34f0485b739d9f;hp=0000000000000000000000000000000000000000000000000000000000000000;hb=d5b8c235a1f3088c6c1e7261df3d1b565d042db2ba2ad1bbd1018782b9178e1f;hpb=411a031ec7cc707b8269acc3dfe28bc8db1bab5a9a91781c26809ae9853c6f6a diff --git a/diff.go b/diff.go new file mode 100644 index 0000000..61ba1ac --- /dev/null +++ b/diff.go @@ -0,0 +1,254 @@ +package main + +import ( + "bufio" + "io" + "log" + "os" + "sort" + "strings" +) + +type Ren struct { + src []string + dst []string +} + +type BySrc []Ren + +func (a BySrc) Len() int { + return len(a) +} + +func (a BySrc) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a BySrc) Less(i, j int) bool { + return namesCmp(a[i].src, a[j].src) < 0 +} + +type EntByName []*Ent + +func (a EntByName) Len() int { + return len(a) +} + +func (a EntByName) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a EntByName) Less(i, j int) bool { + return namesCmp(a[i].name, a[j].name) < 0 +} + +func updateWithDiff(dbPath, strip string) *os.File { + scanner := bufio.NewScanner(os.Stdin) + var t string + var delsNames []string + var addsNames []string + var modsNames []string + var rens []Ren + var isDir bool + for scanner.Scan() { + t = scanner.Text() + if len(t) == 0 { + continue + } + cols := strings.Split(t, "\t") + if len(cols) < 3 { + log.Fatalln("bad zfs-diff format") + } + isDir = cols[1] == "/" + name := deoctalize(strings.TrimPrefix(cols[2], strip)) + if name == "" { + continue + } + name = "./" + name + if isDir { + name += "/" + } + switch cols[0] { + case "-": + delsNames = append(delsNames, name) + case "+": + addsNames = append(addsNames, name) + case "M": + modsNames = append(modsNames, name) + case "R": + if len(cols) != 4 { + log.Fatalln("bad zfs-diff format for R") + } + dst := "./" + deoctalize(strings.TrimPrefix(cols[3], strip)) + if isDir { + dst += "/" + rens = append(rens, Ren{ + src: nameSplit(name), + dst: nameSplit(dst), + }) + } else { + delsNames = append(delsNames, name) + addsNames = append(addsNames, dst) + } + default: + log.Fatalln("bad zfs-diff format") + } + } + + entsReader := make(chan Ent, 1<<10) + db, err := os.Open(dbPath) + if err != nil { + log.Fatalln(err) + } + dels := make([][]string, 0, len(delsNames)+len(rens)) + adds := make([][]string, 0, len(addsNames)+len(rens)) + mods := make([]*Ent, 0, len(modsNames)+len(rens)) + if len(rens) > 0 { + sort.Sort(BySrc(rens)) + go reader(bufio.NewReaderSize(db, 1<<17), entsReader) + var ent Ent + var ok, met bool + for { + ent, ok = <-entsReader + if !ok { + break + } + Retry: + if len(rens) > 0 { + if hasPrefix(ent.name, rens[0].src) { + dels = append(dels, ent.name) + dst := append( + append([]string{}, rens[0].dst...), + ent.name[len(rens[0].src):]..., + ) + adds = append(adds, dst) + mods = append(mods, &Ent{name: dst}) + if !met { + // strip "/" from prefix directory + dst := rens[0].dst + last := dst[len(dst)-1] + dst[len(dst)-1] = last[:len(last)-1] + met = true + } + } else if met { + met = false + rens = rens[1:] + goto Retry + } + } + } + rens = nil + } + + for _, name := range delsNames { + dels = append(dels, nameSplit(name)) + } + delsNames = nil + sort.Sort(ByName(dels)) + + for _, name := range addsNames { + adds = append(adds, nameSplit(name)) + modsNames = append(modsNames, name) + } + addsNames = nil + sort.Sort(ByName(adds)) + + for _, name := range modsNames { + mods = append(mods, &Ent{name: nameSplit(name)}) + } + modsNames = nil + sort.Sort(EntByName(mods)) + var info os.FileInfo + for _, ent := range mods { + info, err = os.Stat(nameJoin(ent.name)) + if err != nil { + log.Println("can not stat:", nameJoin(ent.name), ":", err) + continue + } + if info.Mode().IsRegular() { + ent.size = info.Size() + } + ent.mtime = info.ModTime().Unix() + } + + _, err = db.Seek(0, io.SeekStart) + if err != nil { + log.Fatalln(err) + } + tmp0, err := os.CreateTemp("", "glocate-idx") + if err != nil { + log.Fatalln(err) + } + defer os.Remove(tmp0.Name()) + entsReader = make(chan Ent, 1<<10) + entsDirSizer := make(chan Ent, 1<<10) + entsWriter := make(chan Ent, 1<<10) + go reader(bufio.NewReaderSize(db, 1<<17), entsReader) + + dirSizerJob := make(chan struct{}) + var dirSizes []int64 + sinkBack := make(chan Ent, 1) + go func() { + dirSizer(&dirSizes, 1, sinkBack, entsDirSizer, entsWriter) + close(dirSizerJob) + }() + + bw := bufio.NewWriterSize(tmp0, 1<<17) + writerJob := make(chan struct{}) + go func() { + writer(bw, entsWriter) + close(writerJob) + }() + + for ent := range entsReader { + if len(dels) > 0 && namesCmp(ent.name, dels[0]) == 0 { + dels = dels[1:] + continue + } + for len(adds) > 0 && namesCmp(adds[0], ent.name) < 0 { + if namesCmp(mods[0].name, adds[0]) != 0 { + panic("+ and M lists are out of sync") + } + newEnt := Ent{ + name: adds[0], + mtime: mods[0].mtime, + size: mods[0].size, + } + entsDirSizer <- newEnt + adds = adds[1:] + mods = mods[1:] + } + if len(mods) > 0 && namesCmp(ent.name, mods[0].name) == 0 { + ent.mtime = mods[0].mtime + ent.size = mods[0].size + mods = mods[1:] + } + entsDirSizer <- ent + } + for len(adds) > 0 { + if namesCmp(mods[0].name, adds[0]) != 0 { + panic("+ and M lists are out of sync") + } + newEnt := Ent{ + name: adds[0], + mtime: mods[0].mtime, + size: mods[0].size, + } + entsDirSizer <- newEnt + adds = adds[1:] + mods = mods[1:] + } + + close(entsDirSizer) + <-dirSizerJob + close(entsWriter) + <-writerJob + if err = bw.Flush(); err != nil { + log.Fatalln(err) + } + + tmp1 := applyDirSizes(tmp0, dirSizes) + tmp0.Close() + os.Remove(tmp0.Name()) + return tmp1 +}