dep.rec | 6 ++++-- doc/cmds.texi | 6 ------ doc/faq.texi | 6 +++--- doc/features.texi | 8 +++++--- doc/index.texi | 1 + doc/news.texi | 10 ++++++++++ doc/ood.texi | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ inode.go | 62 +++++++++++++++++++++++++++++++++++++++++++++++------ main.go | 17 +++++++++++++---- makedist.sh | 1 + ood.go | 2 +- usage.go | 8 +++++--- diff --git a/dep.rec b/dep.rec index b60149d4ba722c91445919cd6c9a4b8cc6428ab896fb2340630aa860ea513e26..461ad420ff3f0aed5588d6d5e33e494d610dbc6526323c088f30b4aeac14d8b8 100644 --- a/dep.rec +++ b/dep.rec @@ -6,10 +6,12 @@ %rec: Dependency %doc: Dependency information %mandatory: Type -%allowed: Target Size CtimeSec CtimeNsec Hash -%unique: Type Target Size CtimeSec CtimeNsec Hash +%allowed: Target Size CtimeSec CtimeNsec MtimeSec MtimeNsec Hash +%unique: Type Target Size CtimeSec CtimeNsec MtimeSec MtimeNsec Hash %type: Type enum ifcreate ifchange always stamp %type: Size int %type: CtimeSec int %type: CtimeNsec int +%type: MtimeSec int +%type: MtimeNsec int %type: Hash regexp /[0-9a-f]{64}/ diff --git a/doc/cmds.texi b/doc/cmds.texi index 42e736832895fbad1c0e9255df81d5b92d12df0de1a91e1d6a2a075e533f5acc..f4afdefc13cf837fb70b73f449e6de28dc9df697b839ca9581ada5ce5a82c4b0 100644 --- a/doc/cmds.texi +++ b/doc/cmds.texi @@ -46,12 +46,6 @@ By default all build commands use @code{fsync} to assure data is reached the disk. You can disable its usage with @env{$REDO_NO_SYNC=1} environment variable, for speeding up the build process. -@command{goredo} determines target is out-of-date by comparing its size, -@code{ctime} and content's hash, if @code{ctime} differs. Depending on -the filesystem you use, probably you can not trust its @code{ctime} -value at all. In that case you can set @env{$REDO_INODE_NO_TRUST=1} to -forcefully verify the hash. - There are other commands that could be found in other implementations too: @table @command diff --git a/doc/faq.texi b/doc/faq.texi index 5b63153d63c4bfcfcbdcb1f5a83dd61567403b3119b35513f74a6d262a10712f..cce5048be02985f7a7942e38a97870096fdc1b332c946d524160ee4fdb9d00cd 100644 --- a/doc/faq.texi +++ b/doc/faq.texi @@ -4,9 +4,9 @@ @anchor{Stamping} @section Hashing and stamping -All targets are checksummed if target's @file{ctime} differs from the -previous one, or @env{$REDO_INODE_NO_TRUST} environment variable is set. -@command{apenwarr/redo} gives +All targets are checksummed if target's size, @code{ctime}/@code{mtime} +differs from the previous one (depending on @ref{OOD, @env{$REDO_INODE_TRUST}} +value). @command{apenwarr/redo} gives @url{https://redo.readthedocs.io/en/latest/FAQImpl/#why-not-always-use-checksum-based-dependencies-instead-of-timestamps, many reasons} why every time checksumming is bad, but in my opinion in practice all of them do not apply. diff --git a/doc/features.texi b/doc/features.texi index f1d76720a41d94ca22b517125045b6e97e6054e75a8bea742cf3b94ba0f2be4d..57d95b999102ffb8863d16549a40a1413476d1002cc1e71156e31fd0aad7f46f 100644 --- a/doc/features.texi +++ b/doc/features.texi @@ -21,9 +21,11 @@ arguments, start/finish times, duration, participated PIDs, return codes can be saved for later investigation @item targets, dependency information and their directories are explicitly synced (can be disabled, should work faster) -@item file's change is detected by comparing its size, @code{ctime} (if - @env{$REDO_INODE_NO_TRUST} environment variable is not set) and - @url{https://github.com/BLAKE3-team/BLAKE3, BLAKE3} hash +@item file's change is detected by comparing its size, and + @url{https://github.com/BLAKE3-team/BLAKE3, BLAKE3} hash. Also as an + optimization, by default if file's @code{ctime} is same, then hash + check is skipped. Optionally you can disable that behaviour, or even + enable trust to file's @code{mtime} @item files creation is @code{umask}-friendly (unlike @code{mkstemp()} used in @command{redo-c}) @item parallel build with jobs limit, optionally in infinite mode diff --git a/doc/index.texi b/doc/index.texi index 89e86593611c7544ebea6314f0e615984d0ec52078f4fb9fc107c7eb5a3fcca5..30c7393c5cfc3ff1ebdf6ba9f08558423ddc22e6d7b2151049f87592b9f82a74 100644 --- a/doc/index.texi +++ b/doc/index.texi @@ -49,6 +49,7 @@ @include features.texi @include notes.texi @include rules.texi @include cmds.texi +@include ood.texi @include logs.texi @include news.texi @include install.texi diff --git a/doc/news.texi b/doc/news.texi index a7fcb4cf9edaba645b01b437edd4d70bf84dffe42c168f50c91716ad1dd639be..4c971fe7fd20ee6d487aa93bbefdfdd819a72871dab8f623188a4e04fd0677cc 100644 --- a/doc/news.texi +++ b/doc/news.texi @@ -1,6 +1,16 @@ @node News @unnumbered News +@anchor{Release 1_21_0} +@section Release 1.21.0 +@itemize +@item + @env{$REDO_INODE_NO_TRUST} is replaced with @env{$REDO_INODE_TRUST} + environment variable, that takes either @code{none}, or @code{ctime} + (the default one), or @code{mtime} (new one). Check documentation's + separate page about that option. +@end itemize + @anchor{Release 1_20_0} @section Release 1.20.0 @itemize diff --git a/doc/ood.texi b/doc/ood.texi new file mode 100644 index 0000000000000000000000000000000000000000..a57cb19c0da5ba7c25b358245c660291b545d4ede0c402c6f2ef5ffc996b0517 --- /dev/null +++ b/doc/ood.texi @@ -0,0 +1,52 @@ +@node OOD +@unnumbered Out-of-date determination + +The main task for build system is deciding if the target is out-of-date +and needs rebuilding. The single most reliable way to do that is to +compare file's content with previously recorded one. But that is too +expensive. + +So direct content storage/comparison can be replaced with +collision-resistant hash function of enough length. @command{goredo} +uses @url{https://github.com/BLAKE3-team/BLAKE3, BLAKE3} with 256-bit +output for that purpose. + +Also it stores file's size. Obviously if size differs, then file's +content too and there is no need to read and hash it. + +But still it could be relatively expensive. So there are additional +possible checks that can skip need of hash checking, based on some trust +to the underlying filesystem and operating system behaviour, controlled +by @env{$REDO_INODE_TRUST} environment variable value: + +@table @env + +@item $REDO_INODE_TRUST=none +Do not trust filesystem at all, except for file's size knowledge. +Most reliable mode. + +@item $REDO_INODE_TRUST=ctime +Trust @code{ctime} value of file's inode. It should change every time +inode is updated. If nothing is touched and @code{ctime} is the same, +then assume that file was not modified and we do not try to read its +content. Unfortunately @code{ctime} also changes if link count is +updated and ownership, that could give false positive decision and force +file's rereading. + +@item $REDO_INODE_TRUST=mtime +Trust @code{mtime} value of file's inode. It should change every time +file's content is updated. But unfortunately there are +@url{https://apenwarr.ca/log/20181113, many reasons} it won't. + +@end table + +Pay attention that although @code{mtime} is considered harmful (link +above), and is hardly acceptable in build system like Make, because it +compares timestamps of two files, redo is satisfied only with the fact +of its changing, so badly jumping clocks are not so devastating. Modern +filesystem and operating systems with micro- and nano-seconds resolution +timestamps should be pretty good choice for @env{$REDO_INODE_TRUST=mtime}. +However GNU/Linux with @code{ext4} filesystem can easily have pretty big +granularity of 10ms. + +@command{goredo} uses @env{$REDO_INODE_TRUST=ctime} by default. diff --git a/inode.go b/inode.go index fdf730f8a882e0727c0580961b21fa16f382057cf938d78ec2dfc87136a40ca3..046ae8f4ea6d6f74a63e6aa393aef544a47ae16afd1509e12f7dedc1958e8d55 100644 --- a/inode.go +++ b/inode.go @@ -28,20 +28,45 @@ "go.cypherpunks.ru/recfile" "golang.org/x/sys/unix" ) -const EnvInodeNoTrust = "REDO_INODE_NO_TRUST" +type InodeTrustType int -var InodeTrust = false +//go:generate stringer -type=InodeTrustType +const ( + EnvInodeTrust = "REDO_INODE_TRUST" + + InodeTrustNone InodeTrustType = iota + InodeTrustCtime + InodeTrustMtime +) + +var InodeTrust InodeTrustType type Inode struct { Size int64 CtimeSec int64 CtimeNsec int64 + MtimeSec int64 + MtimeNsec int64 } func (our *Inode) Equals(their *Inode) bool { - return (our.Size == their.Size) && - (our.CtimeSec == their.CtimeSec) && - (our.CtimeNsec == their.CtimeNsec) + if our.Size != their.Size { + return false + } + switch InodeTrust { + case InodeTrustCtime: + if our.CtimeSec != their.CtimeSec || our.CtimeNsec != their.CtimeNsec { + return false + } + case InodeTrustMtime: + if our.MtimeSec == 0 || our.MtimeNsec == 0 { + return false + } + if our.MtimeSec != their.MtimeSec || our.MtimeNsec != their.MtimeNsec { + return false + } + } + return true } func (inode *Inode) RecfileFields() []recfile.Field { @@ -49,6 +74,8 @@ return []recfile.Field{ {Name: "Size", Value: strconv.FormatInt(inode.Size, 10)}, {Name: "CtimeSec", Value: strconv.FormatInt(inode.CtimeSec, 10)}, {Name: "CtimeNsec", Value: strconv.FormatInt(inode.CtimeNsec, 10)}, + {Name: "MtimeSec", Value: strconv.FormatInt(inode.MtimeSec, 10)}, + {Name: "MtimeNsec", Value: strconv.FormatInt(inode.MtimeNsec, 10)}, } } @@ -63,14 +90,22 @@ err = unix.Fstat(int(fd.Fd()), &stat) if err != nil { return nil, err } - sec, nsec := stat.Ctim.Unix() - return &Inode{Size: fi.Size(), CtimeSec: sec, CtimeNsec: nsec}, nil + ctimeSec, ctimeNsec := stat.Ctim.Unix() + mtimeSec := fi.ModTime().Unix() + mtimeNsec := fi.ModTime().UnixNano() + return &Inode{ + Size: fi.Size(), + CtimeSec: ctimeSec, CtimeNsec: ctimeNsec, + MtimeSec: mtimeSec, MtimeNsec: mtimeNsec, + }, nil } func inodeFromRec(m map[string]string) (*Inode, error) { size := m["Size"] ctimeSec := m["CtimeSec"] ctimeNsec := m["CtimeNsec"] + mtimeSec := m["MtimeSec"] + mtimeNsec := m["MtimeNsec"] if size == "" { return nil, errors.New("Size is missing") } @@ -93,6 +128,19 @@ } inode.CtimeNsec, err = strconv.ParseInt(ctimeNsec, 10, 64) if err != nil { return nil, err + } + if mtimeSec != "" { + if mtimeNsec == "" { + return nil, errors.New("MtimeNsec is missing") + } + inode.MtimeSec, err = strconv.ParseInt(mtimeSec, 10, 64) + if err != nil { + return nil, err + } + inode.MtimeNsec, err = strconv.ParseInt(mtimeNsec, 10, 64) + if err != nil { + return nil, err + } } return &inode, nil } diff --git a/main.go b/main.go index 261d6faa289413004f7062e6fc621fcec7db37cb8ae6c9e162db8f48eec9fab6..248b940ccecd9b48d665694190509c2ed1ff81dccbfaa4f28f7cdf5c378257e6 100644 --- a/main.go +++ b/main.go @@ -138,10 +138,6 @@ if err != nil { log.Fatalln(err) } - NoColor = os.Getenv(EnvNoColor) != "" - NoSync = os.Getenv(EnvNoSync) == "1" - InodeTrust = os.Getenv(EnvInodeNoTrust) == "" - TopDir = os.Getenv(EnvTopDir) if TopDir == "" { TopDir = "/" @@ -198,6 +194,19 @@ traced = true } else if flagTrace != nil { traced = *flagTrace } + NoColor = os.Getenv(EnvNoColor) != "" + NoSync = os.Getenv(EnvNoSync) == "1" + switch s := os.Getenv(EnvInodeTrust); s { + case "none": + InodeTrust = InodeTrustNone + case "", "ctime": + InodeTrust = InodeTrustCtime + case "mtime": + InodeTrust = InodeTrustMtime + default: + log.Fatalln("unknown", EnvInodeTrust, "value") + } + tracef(CDebug, "inode-trust: %s", InodeTrust) // Those are internal envs FdOODTgts, err = ioutil.TempFile("", "ood-tgts") diff --git a/makedist.sh b/makedist.sh index 25643d6814411fe1b50c1547a7509168a1f6860242c84a91ffa74fb680c0aed7..1d2f469c28f680a29aa26bd373b9864af5d58de574fcb3ca352b26d352489524 100755 --- a/makedist.sh +++ b/makedist.sh @@ -8,6 +8,7 @@ git clone . $tmp/goredo-$release cd $tmp/goredo-$release git checkout v$release +go generate redo-ifchange VERSION diff --git a/ood.go b/ood.go index 55519056c218d922ce569373d3ab2f9b6c2af7e04d02faaf4d9e4200e4193da6..7245859854c0ffb32c8430d29aaf5ed5b36218295b8f58b4f384bc2bfba5de1a 100644 --- a/ood.go +++ b/ood.go @@ -171,7 +171,7 @@ tracef(CDebug, "ood: %s%s -> %s: size differs", indent, tgtOrig, dep) ood = true goto Done } - if InodeTrust && inode.Equals(theirInode) { + if InodeTrust != InodeTrustNone && inode.Equals(theirInode) { tracef(CDebug, "ood: %s%s -> %s: same inode", indent, tgtOrig, dep) } else { tracef(CDebug, "ood: %s%s -> %s: inode differs", indent, tgtOrig, dep) diff --git a/usage.go b/usage.go index 96768264651756bd1e240d303f4d1abde6440a501417a012968a49d9e545f4c5..a72c75f75810fd4ddab7f05b5de7db4270b1f8c9f258900eb9ac37a5778dae30 100644 --- a/usage.go +++ b/usage.go @@ -24,7 +24,7 @@ "os" ) const ( - Version = "1.20.0" + Version = "1.21.0" Warranty = `Copyright (C) 2020-2021 Sergey Matveev This program is free software: you can redistribute it and/or modify @@ -123,8 +123,10 @@ (it can contain .redo/top as an alternative)`) if cmd == CmdNameRedo || cmd == CmdNameRedoIfchange { fmt.Fprintln(os.Stderr, ` REDO_NO_SYNC -- disable files/directories explicit filesystem syncing - REDO_INODE_NO_TRUST -- do not trust inode information (except for size) - and always check file's hash + REDO_INODE_TRUST -- {none,ctime,mtime}, either do not trust inode + information at all (always check size and hash), or + trust its ctime (the default one), or be satisfied + with its mtime REDO_MAKE -- bmake/gmake/none(default) jobserver protocol compatibility`) } }