bench/bench.sh

   1 #!/hint/bash
   2
   3 # Copyright © Tavian Barnes <tavianator@tavianator.com>
   4 # SPDX-License-Identifier: 0BSD
   5
   6 declare -gA URLS=(
   7     [chromium]="https://chromium.googlesource.com/chromium/src.git"
   8     [linux]="https://github.com/torvalds/linux.git"
   9     [rust]="https://github.com/rust-lang/rust.git"
  10 )
  11
  12 declare -gA TAGS=(
  13     [chromium]=119.0.6036.2
  14     [linux]=v6.5
  15     [rust]=1.72.1
  16 )
  17
  18 COMPLETE_DEFAULT=(linux rust chromium)
  19 EARLY_QUIT_DEFAULT=(chromium)
  20 PRINT_DEFAULT=(linux)
  21 STRATEGIES_DEFAULT=(rust)
  22 JOBS_DEFAULT=(rust)
  23
  24 usage() {
  25     printf 'Usage: tailfin run %s [--default]\n' "${BASH_SOURCE[0]}"
  26     printf '           [--complete] [--early-quit] [--print] [--strategies]\n'
  27     printf '           [--build=...] [--bfs] [--find] [--fd]\n'
  28     printf '           [--no-clean] [--help]\n\n'
  29
  30     printf '  --default\n'
  31     printf '      Run the default set of benchmarks\n\n'
  32
  33     printf '  --complete[=CORPUS]\n'
  34     printf '      Complete traversal benchmark.  \n'
  35     printf '      Default corpus is --complete="%s"\n\n' "${COMPLETE_DEFAULT[*]}"
  36
  37     printf '  --early-quit[=CORPUS]\n'
  38     printf '      Early quitting benchmark.  \n'
  39     printf '      Default corpus is --early-quit=%s\n\n' "${EARLY_QUIT_DEFAULT[*]}"
  40
  41     printf '  --print[=CORPUS]\n'
  42     printf '      Path printing benchmark.  \n'
  43     printf '      Default corpus is --print=%s\n\n' "${PRINT_DEFAULT[*]}"
  44
  45     printf '  --strategies[=CORPUS]\n'
  46     printf '      Search strategy benchmark.\n'
  47     printf '      Default corpus is --strategies=%s\n\n' "${STRATEGIES_DEFAULT[*]}"
  48
  49     printf '  --jobs[=CORPUS]\n'
  50     printf '      Parallelism benchmark.\n'
  51     printf '      Default corpus is --jobs=%s\n\n' "${JOBS_DEFAULT[*]}"
  52
  53     printf '  --build=COMMIT\n'
  54     printf '      Build this bfs commit and benchmark it.  Specify multiple times to\n'
  55     printf '      compare, e.g. --build=3.0.1 --build=3.0.2\n\n'
  56
  57     printf '  --bfs[=COMMAND]\n'
  58     printf '      Benchmark an existing build of bfs\n\n'
  59
  60     printf '  --find[=COMMAND]\n'
  61     printf '      Compare against find\n\n'
  62
  63     printf '  --fd[=COMMAND]\n'
  64     printf '      Compare against fd\n\n'
  65
  66     printf '  --no-clean\n'
  67     printf '      Use any existing corpora as-is\n\n'
  68
  69     printf '  --help\n'
  70     printf '      This message\n\n'
  71 }
  72
  73 # Hack to export an array
  74 export_array() {
  75     local str=$(declare -p "$1" | sed 's/ -a / -ga /')
  76     unset "$1"
  77     export "$1=$str"
  78 }
  79
  80 # Hack to import an array
  81 import_array() {
  82     local cmd="${!1}"
  83     unset "$1"
  84     eval "$cmd"
  85 }
  86
  87 # Set up the benchmarks
  88 setup() {
  89     ROOT=$(realpath -- "$(dirname -- "${BASH_SOURCE[0]}")/..")
  90     if ! [ "$PWD" -ef "$ROOT" ]; then
  91         printf 'error: Please run this script from %s\n\n' "$ROOT" >&2
  92         usage >&2
  93         exit $EX_USAGE
  94     fi
  95
  96     nproc=$(nproc)
  97
  98     # Options
  99
 100     CLEAN=1
 101
 102     BUILD=()
 103     BFS=()
 104     FIND=()
 105     FD=()
 106
 107     COMPLETE=()
 108     EARLY_QUIT=()
 109     PRINT=()
 110     STRATEGIES=()
 111     JOBS=()
 112
 113     for arg; do
 114         case "$arg" in
 115             # Flags
 116             --no-clean)
 117                 CLEAN=0
 118                 ;;
 119             # bfs commits/tags to benchmark
 120             --build=*)
 121                 BUILD+=("${arg#*=}")
 122                 BFS+=("bfs-${arg#*=}")
 123                 ;;
 124             # Utilities to benchmark against
 125             --bfs)
 126                 BFS+=(bfs)
 127                 ;;
 128             --bfs=*)
 129                 BFS+=("${arg#*=}")
 130                 ;;
 131             --find)
 132                 FIND+=(find)
 133                 ;;
 134             --find=*)
 135                 FIND+=("${arg#*=}")
 136                 ;;
 137             --fd)
 138                 FD+=(fd)
 139                 ;;
 140             --fd=*)
 141                 FD+=("${arg#*=}")
 142                 ;;
 143             # Benchmark groups
 144             --complete)
 145                 COMPLETE=("${COMPLETE_DEFAULT[@]}")
 146                 ;;
 147             --complete=*)
 148                 read -ra COMPLETE <<<"${arg#*=}"
 149                 ;;
 150             --early-quit)
 151                 EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}")
 152                 ;;
 153             --early-quit=*)
 154                 read -ra EARLY_QUIT <<<"${arg#*=}"
 155                 ;;
 156             --print)
 157                 PRINT=("${PRINT_DEFAULT[@]}")
 158                 ;;
 159             --print=*)
 160                 read -ra PRINT <<<"${arg#*=}"
 161                 ;;
 162             --strategies)
 163                 STRATEGIES=("${STRATEGIES_DEFAULT[@]}")
 164                 ;;
 165             --strategies=*)
 166                 read -ra STRATEGIES <<<"${arg#*=}"
 167                 ;;
 168             --jobs)
 169                 JOBS=("${JOBS_DEFAULT[@]}")
 170                 ;;
 171             --jobs=*)
 172                 read -ra JOBS <<<"${arg#*=}"
 173                 ;;
 174             --default)
 175                 COMPLETE=("${COMPLETE_DEFAULT[@]}")
 176                 EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}")
 177                 PRINT=("${PRINT_DEFAULT[@]}")
 178                 STRATEGIES=("${STRATEGIES_DEFAULT[@]}")
 179                 JOBS=("${JOBS_DEFAULT[@]}")
 180                 ;;
 181             --help)
 182                 usage
 183                 exit
 184                 ;;
 185             *)
 186                 printf 'error: Unknown option %q\n\n' "$arg" >&2
 187                 usage >&2
 188                 exit $EX_USAGE
 189                 ;;
 190         esac
 191     done
 192
 193     if ((UID == 0)); then
 194         max-freq
 195     fi
 196
 197     echo "Building bfs ..."
 198     as-user make -s -j"$nproc" release all
 199
 200     as-user mkdir -p bench/corpus
 201
 202     declare -A cloned=()
 203     for corpus in "${COMPLETE[@]}" "${EARLY_QUIT[@]}" "${PRINT[@]}" "${STRATEGIES[@]}" "${JOBS[@]}"; do
 204         if ((cloned["$corpus"])); then
 205             continue
 206         fi
 207         cloned["$corpus"]=1
 208
 209         dir="bench/corpus/$corpus"
 210         if ((CLEAN)) || ! [ -e "$dir" ]; then
 211             as-user ./bench/clone-tree.sh "${URLS[$corpus]}" "${TAGS[$corpus]}" "$dir"{,.git}
 212         fi
 213     done
 214
 215     if ((${#BUILD[@]} > 0)); then
 216         echo "Creating bfs worktree ..."
 217
 218         worktree="bench/worktree"
 219         as-user git worktree add -qd "$worktree"
 220         defer as-user git worktree remove "$worktree"
 221
 222         bin="$(realpath -- "$SETUP_DIR")/bin"
 223         as-user mkdir "$bin"
 224
 225         for commit in "${BUILD[@]}"; do
 226             (
 227                 echo "Building bfs $commit ..."
 228                 cd "$worktree"
 229                 as-user git checkout -qd "$commit" --
 230                 as-user make -s -j"$nproc" release
 231                 if [ -e ./bin/bfs ]; then
 232                     as-user cp ./bin/bfs "$bin/bfs-$commit"
 233                 else
 234                     as-user cp ./bfs "$bin/bfs-$commit"
 235                 fi
 236                 as-user make -s clean
 237             )
 238         done
 239
 240         # $SETUP_DIR contains `:` so it won't work in $PATH
 241         # Work around this with a symlink
 242         tmp=$(as-user mktemp)
 243         as-user ln -sf "$bin" "$tmp"
 244         defer rm "$tmp"
 245         export PATH="$tmp:$PATH"
 246     fi
 247
 248     export_array BFS
 249     export_array FIND
 250     export_array FD
 251
 252     export_array COMPLETE
 253     export_array EARLY_QUIT
 254     export_array PRINT
 255     export_array STRATEGIES
 256     export_array JOBS
 257
 258     if ((UID == 0)); then
 259         turbo-off
 260     fi
 261
 262     sync
 263 }
 264
 265 # Runs hyperfine and saves the output
 266 do-hyperfine() {
 267     local tmp_md="$BENCH_DIR/.bench.md"
 268     local md="$BENCH_DIR/bench.md"
 269     local tmp_json="$BENCH_DIR/.bench.json"
 270     local json="$BENCH_DIR/bench.json"
 271
 272     if (($# == 0)); then
 273         printf 'Nothing to do\n\n' | tee -a "$md"
 274         return 1
 275     fi
 276
 277     hyperfine -w2 -M20 --export-markdown="$tmp_md" --export-json="$tmp_json" "$@" &>/dev/tty
 278     cat "$tmp_md" >>"$md"
 279     cat "$tmp_json" >>"$json"
 280     rm "$tmp_md" "$tmp_json"
 281
 282     printf '\n' | tee -a "$md"
 283 }
 284
 285 # Print the header for a benchmark group
 286 group() {
 287     printf "## $1\\n\\n" "${@:2}" | tee -a "$BENCH_DIR/bench.md"
 288 }
 289
 290 # Print the header for a benchmark subgroup
 291 subgroup() {
 292     printf "### $1\\n\\n" "${@:2}" | tee -a "$BENCH_DIR/bench.md"
 293 }
 294
 295 # Print the header for a benchmark sub-subgroup
 296 subsubgroup() {
 297     printf "#### $1\\n\\n" "${@:2}" | tee -a "$BENCH_DIR/bench.md"
 298 }
 299
 300 # Benchmark the complete traversal of a directory tree
 301 # (without printing anything)
 302 bench-complete-corpus() {
 303     total=$(./bin/bfs "$2" -printf '.' | wc -c)
 304
 305     subgroup "%s (%'d files)" "$1" "$total"
 306
 307     cmds=()
 308     for bfs in "${BFS[@]}"; do
 309         cmds+=("$bfs $2 -false")
 310     done
 311
 312     for find in "${FIND[@]}"; do
 313         cmds+=("$find $2 -false")
 314     done
 315
 316     for fd in "${FD[@]}"; do
 317         cmds+=("$fd -u '^$' $2")
 318     done
 319
 320     do-hyperfine "${cmds[@]}"
 321 }
 322
 323 # All complete traversal benchmarks
 324 bench-complete() {
 325     if (($#)); then
 326         group "Complete traversal"
 327
 328         for corpus; do
 329             bench-complete-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
 330         done
 331     fi
 332 }
 333
 334 # Benchmark quiting as soon as a file is seen
 335 bench-early-quit-corpus() {
 336     dir="$2"
 337     max_depth=$(./bin/bfs "$dir" -printf '%d\n' | sort -rn | head -n1)
 338
 339     subgroup '%s (depth %d)' "$1" "$max_depth"
 340
 341     # Save the list of unique filenames, along with their depth
 342     UNIQ="$BENCH_DIR/uniq"
 343     ./bin/bfs "$dir" -printf '%d %f\n' | sort -k2 | uniq -uf1 >"$UNIQ"
 344
 345     for ((i = 2; i <= max_depth; i *= 2)); do
 346         subsubgroup 'Depth %d' "$i"
 347
 348         # Sample random uniquely-named files at depth $i
 349         export FILES="$BENCH_DIR/uniq-$i"
 350         sed -n "s/^$i //p" "$UNIQ" | shuf -n20 >"$FILES"
 351         if ! [ -s "$FILES" ]; then
 352             continue
 353         fi
 354
 355         cmds=()
 356         for bfs in "${BFS[@]}"; do
 357             cmds+=("$bfs $dir -name \$(shuf -n1 \$FILES) -print -quit")
 358         done
 359
 360         for find in "${FIND[@]}"; do
 361             cmds+=("$find $dir -name \$(shuf -n1 \$FILES) -print -quit")
 362         done
 363
 364         for fd in "${FD[@]}"; do
 365             cmds+=("$fd -usg1 \$(shuf -n1 \$FILES) $dir")
 366         done
 367
 368         do-hyperfine "${cmds[@]}"
 369     done
 370 }
 371
 372 # All early-quitting benchmarks
 373 bench-early-quit() {
 374     if (($#)); then
 375         group "Early termination"
 376
 377         for corpus; do
 378             bench-early-quit-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
 379         done
 380     fi
 381 }
 382
 383 # Benchmark printing paths without colors
 384 bench-print-nocolor() {
 385     subsubgroup '%s' "$1"
 386
 387     cmds=()
 388     for bfs in "${BFS[@]}"; do
 389         cmds+=("$bfs $2")
 390     done
 391
 392     for find in "${FIND[@]}"; do
 393         cmds+=("$find $2")
 394     done
 395
 396     for fd in "${FD[@]}"; do
 397         cmds+=("$fd -u --search-path $2")
 398     done
 399
 400     do-hyperfine "${cmds[@]}"
 401 }
 402
 403 # Benchmark printing paths with colors
 404 bench-print-color() {
 405     subsubgroup '%s' "$1"
 406
 407     cmds=()
 408     for bfs in "${BFS[@]}"; do
 409         cmds+=("$bfs $2 -color")
 410     done
 411
 412     for fd in "${FD[@]}"; do
 413         cmds+=("$fd -u --search-path $2 --color=always")
 414     done
 415
 416     do-hyperfine "${cmds[@]}"
 417 }
 418
 419 # All printing benchmarks
 420 bench-print() {
 421     if (($#)); then
 422         group "Printing paths"
 423
 424         subgroup "Without colors"
 425         for corpus; do
 426             bench-print-nocolor "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
 427         done
 428
 429         subgroup "With colors"
 430         for corpus; do
 431             bench-print-color "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
 432         done
 433     fi
 434 }
 435
 436 # Benchmark search strategies
 437 bench-strategies-corpus() {
 438     subgroup '%s' "$1"
 439
 440     if ((${#BFS[@]} == 1)); then
 441         cmds=("$BFS -S "{bfs,dfs,ids,eds}" $2 -false")
 442         do-hyperfine "${cmds[@]}"
 443     else
 444         for S in bfs dfs ids eds; do
 445             subsubgroup '`-S %s`' "$S"
 446
 447             cmds=()
 448             for bfs in "${BFS[@]}"; do
 449                 cmds+=("$bfs -S $S $2 -false")
 450             done
 451             do-hyperfine "${cmds[@]}"
 452         done
 453     fi
 454 }
 455
 456 # All search strategy benchmarks
 457 bench-strategies() {
 458     if (($#)); then
 459         group "Search strategies"
 460
 461         for corpus; do
 462             bench-strategies-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
 463         done
 464     fi
 465 }
 466
 467 # Benchmark parallelism
 468 bench-jobs-corpus() {
 469     subgroup '%s' "$1"
 470
 471     if ((${#BFS[@]} + ${#FD[@]} == 1)); then
 472         cmds=()
 473         for bfs in "${BFS[@]}"; do
 474             if "$bfs" -j1 -quit &>/dev/null; then
 475                 cmds+=("$bfs -j"{1,2,3,4,6,8,12,16}" $2 -false")
 476             else
 477                 cmds+=("$bfs $2 -false")
 478             fi
 479         done
 480
 481         for fd in "${FD[@]}"; do
 482             cmds+=("$fd -j"{1,2,3,4,6,8,12,16}" -u '^$' $2")
 483         done
 484
 485         do-hyperfine "${cmds[@]}"
 486     else
 487         for j in 1 2 3 4 6 8 12 16; do
 488             subsubgroup '`-j%d`' $j
 489
 490             cmds=()
 491             for bfs in "${BFS[@]}"; do
 492                 if "$bfs" -j1 -quit &>/dev/null; then
 493                     cmds+=("$bfs -j$j $2 -false")
 494                 elif ((j == 1)); then
 495                     cmds+=("$bfs $2 -false")
 496                 fi
 497             done
 498
 499             for fd in "${FD[@]}"; do
 500                 cmds+=("$fd -j$j -u '^$' $2")
 501             done
 502
 503             if ((${#cmds[@]})); then
 504                 do-hyperfine "${cmds[@]}"
 505             fi
 506         done
 507     fi
 508 }
 509
 510 # All parallelism benchmarks
 511 bench-jobs() {
 512     if (($#)); then
 513         group "Parallelism"
 514
 515         for corpus; do
 516             bench-jobs-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
 517         done
 518     fi
 519 }
 520
 521 # Print benchmarked versions
 522 bench-versions() {
 523     subgroup "Versions"
 524
 525     local md="$BENCH_DIR/bench.md"
 526
 527     printf '```console\n' >>"$md"
 528
 529     {
 530         for bfs in "${BFS[@]}"; do
 531             printf '$ %s --version | head -n1\n' "$bfs"
 532             "$bfs" --version | head -n1
 533         done
 534
 535         for find in "${FIND[@]}"; do
 536             printf '$ %s --version | head -n1\n' "$find"
 537             "$find" --version | head -n1
 538         done
 539
 540         for fd in "${FD[@]}"; do
 541             printf '$ %s --version\n' "$fd"
 542             "$fd" --version
 543         done
 544     } | tee -a "$md"
 545
 546     printf '```' >>"$md"
 547 }
 548
 549 # Print benchmark details
 550 bench-details() {
 551     group "Details"
 552
 553     bench-versions
 554 }
 555
 556 # Run all the benchmarks
 557 bench() {
 558     import_array BFS
 559     import_array FIND
 560     import_array FD
 561
 562     import_array COMPLETE
 563     import_array EARLY_QUIT
 564     import_array PRINT
 565     import_array STRATEGIES
 566     import_array JOBS
 567
 568     bench-complete "${COMPLETE[@]}"
 569     bench-early-quit "${EARLY_QUIT[@]}"
 570     bench-print "${PRINT[@]}"
 571     bench-strategies "${STRATEGIES[@]}"
 572     bench-jobs "${JOBS[@]}"
 573     bench-details
 574 }