Skip to content

Commit 13004c2

Browse files
committed
up
1 parent e2a3abb commit 13004c2

File tree

2 files changed

+69
-38
lines changed

2 files changed

+69
-38
lines changed

.ci/scripts/test_torchao_huggingface_checkpoints.sh

Lines changed: 60 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,41 @@
11
#!/usr/bin/env bash
22
set -euo pipefail
33

4-
MODEL_NAME=${1:-}
4+
# -------------------------
5+
# Args / flags
6+
# -------------------------
7+
TEST_WITH_RUNNER=0
8+
MODEL_NAME=""
59

6-
if [[ -z "$MODEL_NAME" ]]; then
7-
echo "Usage: $0 <model_name>"
10+
# Parse args
11+
if [[ $# -lt 1 ]]; then
12+
echo "Usage: $0 <model_name> [--test_with_runner]"
813
echo "Supported model_name values: qwen3_4b, phi_4_mini"
914
exit 1
1015
fi
1116

17+
MODEL_NAME="$1"
18+
shift
19+
20+
while [[ $# -gt 0 ]]; do
21+
case "$1" in
22+
--test_with_runner)
23+
TEST_WITH_RUNNER=1
24+
;;
25+
-h|--help)
26+
echo "Usage: $0 <model_name> [--test_with_runner]"
27+
echo " model_name: qwen3_4b | phi_4_mini"
28+
echo " --test_with_runner: build ET + run llama_main to sanity-check the export"
29+
exit 0
30+
;;
31+
*)
32+
echo "Unknown option: $1"
33+
exit 1
34+
;;
35+
esac
36+
shift
37+
done
38+
1239
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
1340
PYTHON_EXECUTABLE=python3
1441
fi
@@ -77,33 +104,35 @@ if [[ $MODEL_SIZE -gt $EXPECTED_MODEL_SIZE_UPPER_BOUND ]]; then
77104
fi
78105

79106
# Install ET with CMake
80-
cmake -DPYTHON_EXECUTABLE=python \
81-
-DCMAKE_INSTALL_PREFIX=cmake-out \
82-
-DEXECUTORCH_ENABLE_LOGGING=1 \
83-
-DCMAKE_BUILD_TYPE=Release \
84-
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
85-
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
86-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
87-
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
88-
-DEXECUTORCH_BUILD_XNNPACK=ON \
89-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
90-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
91-
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
92-
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
93-
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
94-
-Bcmake-out .
95-
cmake --build cmake-out -j16 --config Release --target install
96-
97-
# Install llama runner
98-
cmake -DPYTHON_EXECUTABLE=python \
99-
-DCMAKE_BUILD_TYPE=Release \
100-
-Bcmake-out/examples/models/llama \
101-
examples/models/llama
102-
cmake --build cmake-out/examples/models/llama -j16 --config Release
103-
104-
# Run the model
105-
./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path="${HF_MODEL_DIR}/tokenizer.json" --prompt="Once upon a time,"
107+
if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
108+
echo "[runner] Building and testing llama_main ..."
109+
cmake -DPYTHON_EXECUTABLE=python \
110+
-DCMAKE_INSTALL_PREFIX=cmake-out \
111+
-DEXECUTORCH_ENABLE_LOGGING=1 \
112+
-DCMAKE_BUILD_TYPE=Release \
113+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
114+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
115+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
116+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
117+
-DEXECUTORCH_BUILD_XNNPACK=ON \
118+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
119+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
120+
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
121+
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
122+
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
123+
-Bcmake-out .
124+
cmake --build cmake-out -j16 --config Release --target install
125+
126+
# Install llama runner
127+
cmake -DPYTHON_EXECUTABLE=python \
128+
-DCMAKE_BUILD_TYPE=Release \
129+
-Bcmake-out/examples/models/llama \
130+
examples/models/llama
131+
cmake --build cmake-out/examples/models/llama -j16 --config Release
132+
133+
# Run the model
134+
./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path="${HF_MODEL_DIR}/tokenizer.json" --prompt="Once upon a time,"
135+
fi
106136

107137
# Clean up
108-
rm pytorch_model_converted.bin
109-
rm $MODEL_OUT
138+
rm -f pytorch_model_converted.bin "$MODEL_OUT"

.github/workflows/trunk.yml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -593,13 +593,16 @@ jobs:
593593
contents: read
594594
strategy:
595595
matrix:
596-
mode: [xnnpack+custom]
597-
runner: [linux.2xlarge]
598-
docker-image: [executorch-ubuntu-22.04-clang12]
596+
model: [qwen3_4b, phi_4_mini]
597+
include:
598+
- model: qwen3_4b
599+
test_with_runner: true
600+
- model: phi_4_mini
601+
test_with_runner: false
599602
fail-fast: false
600603
with:
601-
runner: ${{ matrix.runner }}
602-
docker-image: ci-image:${{ matrix.docker-image }}
604+
runner: linux.2xlarge
605+
docker-image: ci-image:executorch-ubuntu-22.04-clang12
603606
submodules: 'recursive'
604607
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
605608
timeout: 900
@@ -611,8 +614,7 @@ jobs:
611614
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
612615
pip install -U "huggingface_hub[cli]"
613616
614-
bash .ci/scripts/test_torchao_huggingface_checkpoints.sh qwen3_4b
615-
bash .ci/scripts/test_torchao_huggingface_checkpoints.sh phi_4_mini
617+
bash .ci/scripts/test_torchao_huggingface_checkpoints.sh ${{ matrix.model }} ${{ matrix.test_with_runner && '--test_with_runner' || '' }}
616618
617619
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
618620
# test-llava-runner-macos:

0 commit comments

Comments
 (0)