| 
 | 1 | +#!/usr/bin/env bash  | 
 | 2 | + | 
 | 3 | +RESULTS="bench-models-results.txt"  | 
 | 4 | +: > "$RESULTS"  | 
 | 5 | + | 
 | 6 | +ARGS_BB="-c 270336 -npp 512,4096,8192 -npl 1,2,4,8,16,32 -ntg 32"  | 
 | 7 | +ARGS_B="-d 0,4096,8192,16384,32768 -p 2048 -n 32"  | 
 | 8 | + | 
 | 9 | +QUICK=0  | 
 | 10 | +while (( "$#" )); do  | 
 | 11 | +  case "$1" in  | 
 | 12 | +    --quick) QUICK=1; shift ;;  | 
 | 13 | +    *) shift ;;  | 
 | 14 | +  esac  | 
 | 15 | +done  | 
 | 16 | + | 
 | 17 | +if (( QUICK )); then  | 
 | 18 | +  ARGS_BB="-c 20480 -npp 512,4096 -npl 1,2,4 -ntg 32"  | 
 | 19 | +  ARGS_B="-d 0 -p 2048 -n 32"  | 
 | 20 | +fi  | 
 | 21 | + | 
 | 22 | +run_model() {  | 
 | 23 | +  local HFR=$1  | 
 | 24 | +  local HFF=$2  | 
 | 25 | + | 
 | 26 | +  printf "## ${HFR}\n" | tee -a "$RESULTS"  | 
 | 27 | +  printf "\n" | tee -a "$RESULTS"  | 
 | 28 | +  printf "Model: https://huggingface.co/${HFR}\n" | tee -a "$RESULTS"  | 
 | 29 | +  printf "\n" | tee -a "$RESULTS"  | 
 | 30 | + | 
 | 31 | +  printf -- "- \`llama-batched-bench\`\n" | tee -a "$RESULTS"  | 
 | 32 | +  printf "\n" | tee -a "$RESULTS"  | 
 | 33 | + | 
 | 34 | +  ./bin/llama-batched-bench \  | 
 | 35 | +    -hfr "${HFR}" -hff "${HFF}" \  | 
 | 36 | +    -m "${HFF}" -fa 1 -ub 2048 --no-mmap \  | 
 | 37 | +    ${ARGS_BB} | tee -a "$RESULTS"  | 
 | 38 | + | 
 | 39 | +  printf "\n" | tee -a "$RESULTS"  | 
 | 40 | + | 
 | 41 | +  printf -- "- \`llama-bench\`\n" | tee -a "$RESULTS"  | 
 | 42 | +  printf "\n" | tee -a "$RESULTS"  | 
 | 43 | + | 
 | 44 | +  ./bin/llama-bench \  | 
 | 45 | +    -m "${HFF}" -fa 1 -ub 2048 -mmp 0 \  | 
 | 46 | +    ${ARGS_B} | tee -a "$RESULTS"  | 
 | 47 | + | 
 | 48 | +  printf "\n" | tee -a "$RESULTS"  | 
 | 49 | + | 
 | 50 | +  printf "\n"  | 
 | 51 | +}  | 
 | 52 | + | 
 | 53 | +run_model "ggml-org/gpt-oss-20b-GGUF"                       "gpt-oss-20b-mxfp4.gguf"  | 
 | 54 | +run_model "ggml-org/gpt-oss-120b-GGUF"                      "gpt-oss-120b-mxfp4-00001-of-00003.gguf"  | 
 | 55 | +run_model "ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF" "qwen3-coder-30b-a3b-instruct-q8_0.gguf"  | 
 | 56 | +run_model "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF"             "qwen2.5-coder-7b-q8_0.gguf"  | 
 | 57 | +run_model "ggml-org/gemma-3-4b-it-qat-GGUF"                 "gemma-3-4b-it-qat-Q4_0.gguf"  | 
 | 58 | + | 
 | 59 | +if [[ -f models-extra.txt ]]; then  | 
 | 60 | +    while read -r HFR HFF; do  | 
 | 61 | +        [[ -z "$HFR" ]] && continue  | 
 | 62 | +        run_model "$HFR" "$HFF"  | 
 | 63 | +    done < models-extra.txt  | 
 | 64 | +fi  | 
 | 65 | + | 
 | 66 | +printf "\n=====================================\n"  | 
 | 67 | +printf "\n"  | 
 | 68 | + | 
 | 69 | +cat "$RESULTS"  | 
 | 70 | + | 
 | 71 | +printf "\n"  | 
 | 72 | +printf "Done! Results are written to $RESULTS\n"  | 
 | 73 | +printf "\n"  | 
 | 74 | + | 
0 commit comments