]> Sergey Matveev's repositories - bfs.git/commitdiff
bench: New script to clone a git repo without file contents
authorTavian Barnes <tavianator@tavianator.com>
Thu, 28 Sep 2023 21:39:34 +0000 (17:39 -0400)
committerTavian Barnes <tavianator@tavianator.com>
Sat, 30 Sep 2023 13:28:51 +0000 (09:28 -0400)
bench/clone-tree.sh [new file with mode: 0755]

diff --git a/bench/clone-tree.sh b/bench/clone-tree.sh
new file mode 100755 (executable)
index 0000000..744b5f4
--- /dev/null
@@ -0,0 +1,143 @@
+#!/usr/bin/env bash
+
+# Copyright © Tavian Barnes <tavianator@tavianator.com>
+# SPDX-License-Identifier: 0BSD
+
+# Creates a directory tree that matches a git repo, but with empty files.  E.g.
+#
+#     $ ./bench/clone-tree.sh "https://.../linux.git" v6.5 ./linux ./linux.git
+#
+# will create or update a shallow clone at ./linux.git, then create a directory
+# tree at ./linux with the same directory tree as the tag v6.5, except all files
+# will be empty.
+
+set -eu
+
+if (($# != 4)); then
+    printf 'Usage: %s https://url/of/repo.git <TAG> path/to/checkout path/to/repo.git\n' "$0" >&2
+    exit 1
+fi
+
+URL="$1"
+TAG="$2"
+DIR="$3"
+REPO="$4"
+
+BENCH=$(dirname -- "${BASH_SOURCE[0]}")
+BIN=$(realpath -- "$BENCH/../bin")
+BFS="$BIN/bfs"
+XTOUCH="$BIN/tests/xtouch"
+
+if [ "${NPROC-}" ]; then
+    # Use fewer cores in recursive calls
+    export NPROC=$(((NPROC + 1) / 2))
+else
+    export NPROC=$(nproc)
+fi
+
+JOBS=$((NPROC < 8 ? NPROC : 8))
+
+do-git() {
+    git -C "$REPO" "$@"
+}
+
+if ! [ -e "$REPO" ]; then
+    mkdir -p -- "$REPO"
+    do-git init -q --bare
+fi
+
+has-ref() {
+    do-git rev-list --quiet -1 --missing=allow-promisor "$1" &>/dev/null
+}
+
+sparse-fetch() {
+    do-git -c fetch.negotiationAlgorithm=noop fetch -q --filter=blob:none --depth=1 --no-tags --no-write-fetch-head --no-auto-gc "$@"
+}
+
+if ! has-ref "$TAG"; then
+    printf 'Fetching %s ...\n' "$TAG" >&2
+    do-git config remote.origin.url "$URL"
+    if ((${#TAG} >= 40)); then
+        sparse-fetch origin "$TAG"
+    else
+        sparse-fetch origin tag "$TAG"
+    fi
+fi
+
+# Delete a tree in parallel
+clean() {
+    local d=5
+    "$BFS" -f "$1" -mindepth $d -maxdepth $d -type d -print0 \
+        | xargs -0r -n1 -P$JOBS -- "$BFS" -j1 -mindepth 1 -delete -f
+    "$BFS" -f "$1" -delete
+}
+
+if [ -e "$DIR" ]; then
+    printf 'Cleaning old directory tree %s ...\n' "$DIR" >&2
+    TMP=$(mktemp -dp "$(dirname -- "$DIR")")
+    mv -- "$DIR" "$TMP"
+    clean "$TMP" &
+fi
+
+# List gitlinks (submodule references) in the tree
+ls-gitlinks() {
+    do-git ls-tree -zr "$TAG" \
+        | sed -zn 's/.* commit //p'
+}
+
+# Get the submodule ID for a path
+submodule-for-path() {
+    do-git config --blob "$TAG:.gitmodules" \
+                  --name-only \
+                  --fixed-value \
+                  --get-regexp 'submodule\..**\.path' "$1" \
+        | sed -En 's/submodule\.(.*)\.path/\1/p'
+}
+
+# Get the URL for a submodule
+submodule-url() {
+    # - https://chrome-internal.googlesource.com/
+    #   - not publicly accessible
+    # - https://chromium.googlesource.com/external/github.com/WebKit/webkit.git
+    #   - is accessible, but the commit (59e9de61b7b3) isn't
+    # - https://android.googlesource.com/
+    #   - is accessible, but you need an account
+
+    do-git config --blob "$TAG:.gitmodules" \
+                  --get "submodule.$1.url" \
+        | sed -E \
+              -e '\|^https://chrome-internal.googlesource.com/|Q1' \
+              -e '\|^https://chromium.googlesource.com/external/github.com/WebKit/webkit.git|Q1' \
+              -e '\|^https://android.googlesource.com/|Q1'
+}
+
+# Recursively checkout submodules
+while read -rd '' SUBREF SUBDIR; do
+    SUBNAME=$(submodule-for-path "$SUBDIR")
+    SUBURL=$(submodule-url "$SUBNAME") || continue
+
+    if (($(jobs -pr | wc -w) >= JOBS)); then
+        wait -n
+    fi
+    "$0" "$SUBURL" "$SUBREF" "$DIR/$SUBDIR" "$REPO/modules/$SUBNAME" &
+done < <(ls-gitlinks)
+
+# Touch files in parallel
+xtouch() (
+    cd "$DIR"
+    if ((JOBS > 1)); then
+        xargs -0r -n4096 -P$JOBS -- "$XTOUCH" -p --
+    else
+        xargs -0r -- "$XTOUCH" -p --
+    fi
+)
+
+# Check out files
+printf 'Checking out %s ...\n' "$DIR" >&2
+mkdir -p -- "$DIR"
+do-git ls-tree -zr "$TAG"\
+    | sed -zn 's/.* blob .*\t//p' \
+    | xtouch
+
+# Wait for cleaning/submodules
+wait