threshold with script

cccclai · cccclai · commit f673dc5fc610 · 2025-09-23T16:54:12.000-07:00
diff --git a/.ci/scripts/test_qnn_static_llama_eval.sh b/.ci/scripts/test_qnn_static_llama_eval.sh
@@ -27,6 +27,29 @@ fi
 
 which "${PYTHON_EXECUTABLE}"
 
+# -------------------------------
+# Parse args
+# -------------------------------
+EXTRA_FLAGS=""
+THRESHOLD=62.0  # default fallback
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --flags)
+      EXTRA_FLAGS="$2"
+      shift 2
+      ;;
+    --threshold)
+      THRESHOLD="$2"
+      shift 2
+      ;;
+    *)
+      echo "Unknown option: $1"
+      exit 1
+      ;;
+  esac
+done
+
 # Config
 PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE:-python3}"
 MODEL="qwen2_5-0_5b"
@@ -39,7 +62,7 @@ EXTRA_FLAGS="$@"
 # Run command and capture *both stdout and stderr*
 LOG_FILE="eval_${MODEL}_$(date +%Y%m%d_%H%M%S).log"
 
-echo ">>> Running evaluation..."
+echo ">>> Running evaluation with flags: $EXTRA_FLAGS | threshold: $THRESHOLD"
 $PYTHON_EXECUTABLE -m executorch.examples.qualcomm.oss_scripts.llama.eval_llama_qnn \
   --decoder_model "$MODEL" \
   --quant_linear_only \
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -639,7 +639,9 @@ jobs:
 
         # Test static llama weight sharing and accuracy
         echo ">>> Running config: ${{ matrix.config.name }}"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama_eval.sh ${{ matrix.config.flags }}
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama_eval.sh \
+          ${{ matrix.config.flags }} \
+          --threshold ${{ matrix.config.threshold }}
 
 
   # test-qnn-models-linux: