gocheese.go | 181 ++++++++++++++++++++++------------------------------- gocheese.texi | 48 +++++++++++++++++++++++++++++------------------- integrity.go | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++ diff --git a/gocheese.go b/gocheese.go index a419065075ea37576ad326e75ca78939875460fedfdf1141a63ca7a17c77b0d9..e11ac53e47143fe5ccc6483d3241c61be2fc42b1671d6e73418424ed400800e7 100644 --- a/gocheese.go +++ b/gocheese.go @@ -45,6 +45,7 @@ "strings" "syscall" "time" + "golang.org/x/crypto/blake2b" "golang.org/x/net/netutil" ) @@ -60,7 +61,6 @@ HTMLEnd = " \n\n" HTMLElement = " %s
\n" InternalFlag = ".internal" GPGSigExt = ".asc" - GPGSigAttr = " data-gpg-sig=true" Warranty = `This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -76,8 +76,19 @@ along with this program. If not, see .` ) var ( - pkgPyPI = regexp.MustCompile(`^.*]*>(.+)
.*$`) - Version string = "UNKNOWN" + pkgPyPI = regexp.MustCompile(`^.*]*>(.+)
.*$`) + normalizationRe = regexp.MustCompilePOSIX("[-_.]+") + + HashAlgoSHA256 = "sha256" + HashAlgoBLAKE2b256 = "blake2_256" + HashAlgoSHA512 = "sha512" + HashAlgoMD5 = "md5" + knownHashAlgos []string = []string{ + HashAlgoSHA256, + HashAlgoBLAKE2b256, + HashAlgoSHA512, + HashAlgoMD5, + } root = flag.String("root", "./packages", "Path to packages directory") bind = flag.String("bind", "[::]:8080", "Address to bind to") @@ -94,10 +105,9 @@ maxClients = flag.Int("maxclients", 128, "Maximal amount of simultaneous clients") version = flag.Bool("version", false, "Print version information") warranty = flag.Bool("warranty", false, "Print warranty information") - killed bool - - pypiURLParsed *url.URL - normalizationRe *regexp.Regexp = regexp.MustCompilePOSIX("[-_.]+") + Version string = "UNKNOWN" + killed bool + pypiURLParsed *url.URL ) func mkdirForPkg(w http.ResponseWriter, r *http.Request, dir string) bool { @@ -110,6 +120,14 @@ } log.Println(r.RemoteAddr, "mkdir", dir) } return true +} + +func blake2b256New() hash.Hash { + h, err := blake2b.New256(nil) + if err != nil { + panic(err) + } + return h } func refreshDir( @@ -137,20 +155,15 @@ if !mkdirForPkg(w, r, dir) { return false } dirPath := filepath.Join(*root, dir) - var submatches []string - var uri string - var filename string - var path string - var pkgURL *url.URL - var digest []byte for _, lineRaw := range bytes.Split(body, []byte("\n")) { - submatches = pkgPyPI.FindStringSubmatch(string(lineRaw)) + submatches := pkgPyPI.FindStringSubmatch(string(lineRaw)) if len(submatches) == 0 { continue } - uri = submatches[1] - filename = submatches[2] - if pkgURL, err = url.Parse(uri); err != nil { + uri := submatches[1] + filename := submatches[2] + pkgURL, err := url.Parse(uri) + if err != nil { http.Error(w, err.Error(), http.StatusBadGateway) return false } @@ -169,31 +182,31 @@ log.Println(r.RemoteAddr, "pypi", filename, "invalid digest provided") http.Error(w, "invalid digest provided", http.StatusBadGateway) return false } - digest, err = hex.DecodeString(digestInfo[1]) + digest, err := hex.DecodeString(digestInfo[1]) if err != nil { http.Error(w, err.Error(), http.StatusBadGateway) return false } + hashAlgo := digestInfo[0] var hasherNew func() hash.Hash - var hashExt string var hashSize int - switch digestInfo[0] { - case "md5": - hashExt = ".md5" + switch hashAlgo { + case HashAlgoMD5: hasherNew = md5.New hashSize = md5.Size - case "sha256": - hashExt = ".sha256" + case HashAlgoSHA256: hasherNew = sha256.New hashSize = sha256.Size - case "sha512": - hashExt = ".sha512" + case HashAlgoSHA512: hasherNew = sha512.New hashSize = sha512.Size + case HashAlgoBLAKE2b256: + hasherNew = blake2b256New + hashSize = blake2b.Size256 default: log.Println( r.RemoteAddr, "pypi", filename, - "unknown digest algorithm", digestInfo[0], + "unknown digest algorithm", hashAlgo, ) http.Error(w, "unknown digest algorithm", http.StatusBadGateway) return false @@ -210,7 +223,8 @@ uri = pypiURLParsed.ResolveReference(pkgURL).String() } else { uri = pkgURL.String() } - path = filepath.Join(dirPath, filename) + + path := filepath.Join(dirPath, filename) if filename == filenameGet { if killed { // Skip heavy remote call, when shutting down @@ -226,7 +240,7 @@ return false } defer resp.Body.Close() hasher := hasherNew() - hasherOur := sha256.New() + hasherSHA256 := sha256.New() dst, err := TempFile(dirPath) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) @@ -234,8 +248,8 @@ return false } dstBuf := bufio.NewWriter(dst) wrs := []io.Writer{hasher, dstBuf} - if hashExt != ".sha256" { - wrs = append(wrs, hasherOur) + if hashAlgo != HashAlgoSHA256 { + wrs = append(wrs, hasherSHA256) } wr := io.MultiWriter(wrs...) if _, err = io.Copy(wr, resp.Body); err != nil { @@ -275,9 +289,12 @@ if err = DirSync(dirPath); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return false } - if hashExt != ".sha256" { - hashExt = ".sha256" - digest = hasherOur.Sum(nil) + if hashAlgo != HashAlgoSHA256 { + hashAlgo = HashAlgoSHA256 + digest = hasherSHA256.Sum(nil) + for _, algo := range knownHashAlgos[1:] { + os.Remove(path + "." + algo) + } } } if filename == filenameGet || gpgUpdate { @@ -297,6 +314,10 @@ resp.Body.Close() if err != nil { goto GPGSigSkip } + if !bytes.HasPrefix(sig, []byte("-----BEGIN PGP SIGNATURE-----")) { + log.Println(r.RemoteAddr, "pypi non PGP signature", filename) + goto GPGSigSkip + } if err = WriteFileSync(dirPath, path+GPGSigExt, sig); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return false @@ -304,7 +325,7 @@ } log.Println(r.RemoteAddr, "pypi downloaded signature", filename) } GPGSigSkip: - path = path + hashExt + path = path + "." + hashAlgo _, err = os.Stat(path) if err == nil { continue @@ -363,54 +384,44 @@ if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } - var result bytes.Buffer - result.WriteString(fmt.Sprintf(HTMLBegin, dir)) - var digest []byte - var gpgSigAttr string - var fnClean string files := make(map[string]struct{}, len(fis)/2) for _, fi := range fis { files[fi.Name()] = struct{}{} } - for _, algoExt := range []string{".sha256", ".sha512", ".md5"} { + var result bytes.Buffer + result.WriteString(fmt.Sprintf(HTMLBegin, dir)) + for _, algo := range knownHashAlgos { for fn, _ := range files { if killed { // Skip expensive I/O when shutting down http.Error(w, "shutting down", http.StatusInternalServerError) return } - if !strings.HasSuffix(fn, algoExt) { + if !strings.HasSuffix(fn, "."+algo) { continue } - digest, err = ioutil.ReadFile(filepath.Join(dirPath, fn)) + delete(files, fn) + digest, err := ioutil.ReadFile(filepath.Join(dirPath, fn)) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } - fnClean = strings.TrimSuffix(fn, algoExt) - if _, err = os.Stat(filepath.Join(dirPath, fnClean+GPGSigExt)); os.IsNotExist(err) { - gpgSigAttr = "" - } else { - gpgSigAttr = GPGSigAttr + fnClean := strings.TrimSuffix(fn, "."+algo) + delete(files, fnClean) + gpgSigAttr := "" + if _, err = os.Stat(filepath.Join(dirPath, fnClean+GPGSigExt)); err == nil { + gpgSigAttr = " data-gpg-sig=true" + delete(files, fnClean+GPGSigExt) } result.WriteString(fmt.Sprintf( HTMLElement, strings.Join([]string{ *refreshURLPath, dir, "/", fnClean, - "#", algoExt[1:], "=", hex.EncodeToString(digest), + "#", algo, "=", hex.EncodeToString(digest), }, ""), gpgSigAttr, fnClean, )) - for _, n := range []string{ - fnClean, - fnClean + GPGSigExt, - fnClean + ".sha256", - fnClean + ".sha512", - fnClean + ".md5", - } { - delete(files, n) - } } } result.WriteString(HTMLEnd) @@ -454,8 +465,8 @@ if !exists || len(pkgNames) != 1 { http.Error(w, "single name is expected in request", http.StatusBadRequest) return } - dir := normalizationRe.ReplaceAllString(pkgNames[0], "-") - dirPath := filepath.Join(*root, dir) + pkgName := normalizationRe.ReplaceAllString(pkgNames[0], "-") + dirPath := filepath.Join(*root, pkgName) var digestExpected []byte if digestExpectedHex, exists := r.MultipartForm.Value["sha256_digest"]; exists { digestExpected, err = hex.DecodeString(digestExpectedHex[0]) @@ -468,7 +479,7 @@ gpgSigsExpected := make(map[string]struct{}) // Checking is it internal package if _, err = os.Stat(filepath.Join(dirPath, InternalFlag)); err != nil { - log.Println(r.RemoteAddr, "non-internal package", dir) + log.Println(r.RemoteAddr, "non-internal package", pkgName) http.Error(w, "unknown internal package", http.StatusUnauthorized) return } @@ -483,7 +494,7 @@ log.Println(r.RemoteAddr, "already exists", filename) http.Error(w, "already exists", http.StatusBadRequest) return } - if !mkdirForPkg(w, r, dir) { + if !mkdirForPkg(w, r, pkgName) { return } src, err := file.Open() @@ -538,7 +549,7 @@ if err = DirSync(dirPath); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } - if err = WriteFileSync(dirPath, path+".sha256", digest); err != nil { + if err = WriteFileSync(dirPath, path+"."+HashAlgoSHA256, digest); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -613,50 +624,6 @@ serveUpload(w, r) default: http.Error(w, "unknown action", http.StatusBadRequest) } -} - -func goodIntegrity() bool { - dirs, err := ioutil.ReadDir(*root) - if err != nil { - log.Fatal(err) - } - hasher := sha256.New() - digest := make([]byte, sha256.Size) - isGood := true - var data []byte - var pkgName string - for _, dir := range dirs { - files, err := ioutil.ReadDir(filepath.Join(*root, dir.Name())) - if err != nil { - log.Fatal(err) - } - for _, file := range files { - if !strings.HasSuffix(file.Name(), ".sha256") { - continue - } - pkgName = strings.TrimSuffix(file.Name(), ".sha256") - data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), pkgName)) - if err != nil { - if os.IsNotExist(err) { - continue - } - log.Fatal(err) - } - hasher.Write(data) - data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), file.Name())) - if err != nil { - log.Fatal(err) - } - if bytes.Compare(hasher.Sum(digest[:0]), data) == 0 { - fmt.Println(pkgName, "GOOD") - } else { - isGood = false - fmt.Println(pkgName, "BAD") - } - hasher.Reset() - } - } - return isGood } func main() { diff --git a/gocheese.texi b/gocheese.texi index 5dd0d812d58efa81ff74a6949613a8c62011836f9d718bd60896411d60f1c8e1..8b4d55be6ca145d367d2f1d2e0107e333d65dd45cae1696a75162ac6664e3570 100644 --- a/gocheese.texi +++ b/gocheese.texi @@ -25,13 +25,14 @@ @itemize @item proxying and caching of missing packages, including GPG signatures @item @url{https://pythonwheels.com/, Wheel} uploading support +@item integrity check of proxied packages: MD5, SHA256, SHA512, BLAKE2b-256 +@item SHA256 checksums for stored packages +@item verifying of SHA256 checksum for uploaded packages +@item storing of uploaded GPG signatures +@item no YAML configuration, just command-line arguments +@item no package overwriting ability (as PyPI does too) @item atomic packages store on filesystem -@item SHA256-checksummed packages: storing checksums, giving them back, - verifying stored files integrity, verifying checksum of uploaded - packaged @item graceful HTTP-server shutdown -@item no YAML configuration, just command-line arguments -@item no package overwriting ability (as PyPI does too) @end itemize Also it contains @file{pyshop2packages.sh} migration script for @@ -91,7 +92,7 @@ file is checked against it. Pay attention that you have to manually create corresponding private package directory! You are not allowed to upload anything explicitly -flagged as private. +flagged as internal package. @node Passwords @unnumbered Password authentication @@ -184,31 +185,40 @@ @verbatim root +-- public-package | +- public-package-0.1.tar.gz.md5 - | +- public-package-0.1.1.tar.gz.sha256 + | +- public-package-0.1.tar.gz.blake2_256 + | +- public-package-0.1.1.tar.gz.blake2_256 | +- public-package-0.2.tar.gz | +- public-package-0.2.tar.gz.asc | +- public-package-0.2.tar.gz.sha256 +-- private-package | +- .internal | +- private-package-0.1.tar.gz + | +- private-package-0.1.tar.gz.asc | +- private-package-0.1.tar.gz.sha256 |... @end verbatim -Each directory is a package name. When you try to list non existent -directory contents (you are downloading package you have not seen -before), then GoCheese will download information about package's -versions with checksums and write them in corresponding @file{.sha256} -files. However no package package tarball is downloaded. +Each directory is a normalized package name. When you try to list non +existent directory contents (you are downloading package you have not +seen before), then GoCheese will download information about package's +versions with checksums and write them in corresponding +@file{.sha256}, @file{.blake2_256}, @file{.sha512}, @file{.md5} files. +However no package package tarball is downloaded. When you request for particular package version, then its tarball is -downloaded and verified against the checksum. For example in the root -directory above we have downloaded only @file{public-package-0.2}. -If upstream has corresponding @file{.asc} file, then it also will be -downloaded. +downloaded and verified against the stored checksum. But SHA256 is +forced to be stored and used later. -Private packages contain @file{.internal} file, indicating that it must -not be asked in PyPI if required version is missing. You have to create -it manually. +For example @file{public-package} has @code{0.1} version, downloaded a +long time ago with MD5 checksum. @code{0.1.1} version is downloaded more +recently with BLAKE2b-256 checksum, also storing that checksum for +@code{0.1}. @code{0.2} version is downloaded tarball, having forced +SHA256 recalculated checksum. Also upstream has corresponding +@file{.asc} signature file. + +@file{private-package} is private package, because it contains +@file{.internal} file. It can be uploaded and queries to it are not +proxied to upstream PyPI. You have to create it manually. If you upload +GPG signature, then it will be also stored. @bye diff --git a/integrity.go b/integrity.go new file mode 100644 index 0000000000000000000000000000000000000000..ca8e6e7733657c7651da811ae7c63023246567243dc4f111dd9c2daf4ef492b4 --- /dev/null +++ b/integrity.go @@ -0,0 +1,73 @@ +/* +GoCheese -- Python private package repository and caching proxy +Copyright (C) 2019 Sergey Matveev + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "bytes" + "crypto/sha256" + "fmt" + "io/ioutil" + "log" + "os" + "path/filepath" + "strings" +) + +func goodIntegrity() bool { + dirs, err := ioutil.ReadDir(*root) + if err != nil { + log.Fatal(err) + } + hasher := sha256.New() + digest := make([]byte, sha256.Size) + isGood := true + var data []byte + var pkgName string + for _, dir := range dirs { + files, err := ioutil.ReadDir(filepath.Join(*root, dir.Name())) + if err != nil { + log.Fatal(err) + } + for _, file := range files { + if !strings.HasSuffix(file.Name(), "."+HashAlgoSHA256) { + continue + } + pkgName = strings.TrimSuffix(file.Name(), "."+HashAlgoSHA256) + data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), pkgName)) + if err != nil { + if os.IsNotExist(err) { + continue + } + log.Fatal(err) + } + hasher.Write(data) + data, err = ioutil.ReadFile(filepath.Join(*root, dir.Name(), file.Name())) + if err != nil { + log.Fatal(err) + } + if bytes.Compare(hasher.Sum(digest[:0]), data) == 0 { + fmt.Println(pkgName, "GOOD") + } else { + isGood = false + fmt.Println(pkgName, "BAD") + } + hasher.Reset() + } + } + return isGood +}