@@ -5,6 +5,7 @@ set -euxo pipefail
55# Args / flags
66# -------------------------
77TEST_WITH_RUNNER=0
8+ USE_TORCHAO_KERNELS=0
89MODEL_NAME=" "
910
1011# Parse args
@@ -22,10 +23,14 @@ while [[ $# -gt 0 ]]; do
2223 --test_with_runner)
2324 TEST_WITH_RUNNER=1
2425 ;;
26+ --use_torchao_kernels)
27+ USE_TORCHAO_KERNELS=1
28+ ;;
2529 -h|--help)
26- echo " Usage: $0 <model_name> [--test_with_runner]"
30+ echo " Usage: $0 <model_name> [--test_with_runner] [--use_torchao_kernels] "
2731 echo " model_name: qwen3_4b | phi_4_mini"
2832 echo " --test_with_runner: build ET + run llama_main to sanity-check the export"
33+ echo " --use_torchao_kernels: use torchao kernels for linear and tied embedding"
2934 exit 0
3035 ;;
3136 * )
4247
4348MODEL_OUT=model.pte
4449
50+
51+ # Default to XNNPACK
52+ BACKEND_ARGS=" -X --xnnpack-extended-ops"
53+ if [[ " $USE_TORCHAO_KERNELS " -eq 1 ]]; then
54+ BACKEND_ARGS=" --use-torchao-kernels"
55+ fi
56+
4557case " $MODEL_NAME " in
4658 qwen3_4b)
4759 echo " Running Qwen3-4B export..."
@@ -58,12 +70,12 @@ case "$MODEL_NAME" in
5870 --output_name $MODEL_OUT \
5971 -kv \
6072 --use_sdpa_with_kv_cache \
61- -X \
62- --xnnpack-extended-ops \
6373 --max_context_length 1024 \
6474 --max_seq_length 1024 \
75+ --metadata ' {"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
76+ --verbose \
6577 --dtype fp32 \
66- --metadata ' {"get_bos_id":199999, "get_eos_ids":[200020,199999]} '
78+ ${BACKEND_ARGS}
6779 ;;
6880
6981 phi_4_mini)
@@ -81,12 +93,12 @@ case "$MODEL_NAME" in
8193 --output_name $MODEL_OUT \
8294 -kv \
8395 --use_sdpa_with_kv_cache \
84- -X \
85- --xnnpack-extended-ops \
8696 --max_context_length 1024 \
8797 --max_seq_length 1024 \
98+ --metadata ' {"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
99+ --verbose \
88100 --dtype fp32 \
89- --metadata ' {"get_bos_id":199999, "get_eos_ids":[200020,199999]} '
101+ ${BACKEND_ARGS}
90102 ;;
91103
92104 * )
@@ -104,6 +116,10 @@ if [[ $MODEL_SIZE -gt $EXPECTED_MODEL_SIZE_UPPER_BOUND ]]; then
104116fi
105117
106118# Install ET with CMake
119+ EXECUTORCH_BUILD_KERNELS_TORCHAO=" OFF"
120+ if [[ " $USE_TORCHAO_KERNELS " -eq 1 ]]; then
121+ EXECUTORCH_BUILD_KERNELS_TORCHAO=" ON"
122+ fi
107123if [[ " $TEST_WITH_RUNNER " -eq 1 ]]; then
108124 echo " [runner] Building and testing llama_main ..."
109125 cmake -DPYTHON_EXECUTABLE=python \
@@ -120,6 +136,7 @@ if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
120136 -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
121137 -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
122138 -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
139+ -DEXECUTORCH_BUILD_KERNELS_TORCHAO=${EXECUTORCH_BUILD_KERNELS_TORCHAO} \
123140 -Bcmake-out .
124141 cmake --build cmake-out -j16 --config Release --target install
125142
0 commit comments