pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 2 additions & 0 deletions b/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.ci/scripts/setup-emscripten.sh‎
Lines changed: 8 additions & 0 deletions b/‎.ci/scripts/setup-emscripten.sh‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/test_llama_torchao_lowbit.sh‎
Lines changed: 2 additions & 0 deletions b/‎.ci/scripts/test_llama_torchao_lowbit.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llava.sh‎
Lines changed: 2 additions & 0 deletions b/‎.ci/scripts/test_llava.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 17 additions & 6 deletions b/‎.ci/scripts/test_model.sh‎
Lines changed: 17 additions & 6 deletions
diff --git a/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 9 additions & 1 deletion b/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions
@@ -1 +1 @@
-a3942627f5ac048e06b4b1d703b0a6a53bf6da5b
+eea657ddbdeb1118943a92fb73c289985c3ee1ba
@@ -1 +1 @@
-ab43fe4bdf5ccd82897f0e982c451a0127bd175e
+6fc0ad22f0a07b6f38d138861c56a765d5a9bb02
@@ -33,6 +33,8 @@ set_up_aot() {
       -DEXECUTORCH_BUILD_DEVTOOLS=ON \
       -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
       -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_EXTENSION_LLM=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_EXTENSION_LLM_RUNNER=ON \
       -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
       -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
       -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
 
@@ -7,6 +7,13 @@
 
 set -ex
 
+# need version >= 17
+install_node() {
+    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash
+    source "$HOME/.nvm/nvm.sh"
+    nvm install 22
+}
+
 install_emscripten() {
     git clone https://github.com/emscripten-core/emsdk.git
     pushd emsdk || return
@@ -16,4 +23,5 @@ install_emscripten() {
     popd || return
 }
 
+install_node
 install_emscripten
@@ -150,6 +150,7 @@ cmake_install_executorch_libraries() {
     echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
     rm -rf cmake-out
     retry cmake --preset llm \
+        -DBUILD_TESTING=OFF \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
         -DEXECUTORCH_BUILD_QNN="$QNN" \
 
@@ -29,6 +29,8 @@ cmake -DPYTHON_EXECUTABLE=python \
     -DEXECUTORCH_ENABLE_LOGGING=1 \
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
 
@@ -38,6 +38,8 @@ EXECUTORCH_COMMON_CMAKE_ARGS="                      \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON      \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
         -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON      \
         -DEXECUTORCH_BUILD_KERNELS_LLM=ON        \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON     \
 
@@ -232,21 +232,24 @@ test_model_with_qnn() {
 # @param should_test If true, build and test the model using the coreml_executor_runner.
 test_model_with_coreml() {
   local should_test="$1"
+  local test_with_pybindings="$2"
+  local dtype="$3"
 
   if [[ "${BUILD_TOOL}" != "cmake" ]]; then
     echo "coreml only supports cmake."
     exit 1
   fi
 
-  DTYPE=float16
+  RUN_WITH_PYBINDINGS=""
+  if [[ "${test_with_pybindings}" == true ]]; then
+    echo \"Running with pybindings\"
+    export RUN_WITH_PYBINDINGS="--run_with_pybindings"
+  fi
 
-  "${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision "${DTYPE}" --use_partitioner
+  "${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision ${dtype} --use_partitioner ${RUN_WITH_PYBINDINGS}
   EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit)
 
   if [ -n "$EXPORTED_MODEL" ]; then
-    EXPORTED_MODEL_WITH_DTYPE="${EXPORTED_MODEL%.pte}_${DTYPE}.pte"
-    mv "$EXPORTED_MODEL" "$EXPORTED_MODEL_WITH_DTYPE"
-    EXPORTED_MODEL="$EXPORTED_MODEL_WITH_DTYPE"
     echo "OK exported model: $EXPORTED_MODEL"
   else
     echo "[error] failed to export model: no .pte file found"
@@ -303,7 +306,15 @@ elif [[ "${BACKEND}" == *"coreml"* ]]; then
   if [[ "${BACKEND}" == *"test"* ]]; then
     should_test_coreml=true
   fi
-  test_model_with_coreml "${should_test_coreml}"
+  test_with_pybindings=false
+  if [[ "${BACKEND}" == *"pybind"* ]]; then
+    test_with_pybindings=true
+  fi
+  dtype=float16
+  if [[ "${BACKEND}" == *"float32"* ]]; then
+    dtype=float32
+  fi
+  test_model_with_coreml "${should_test_coreml}" "${test_with_pybindings}" "${dtype}"
   if [[ $? -eq 0 ]]; then
     prepare_artifacts_upload
   fi
 
@@ -41,6 +41,10 @@ exit_code1=$?
 $PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir . --llama_artifacts . --enable_x86_64
 exit_code2=$?
 
+# Check BC
+bash backends/qualcomm/bc/test_qnn_static_llama_bc.sh
+exit_code3=$?
+
 # Check the exit codes and print messages
 if [ $exit_code1 -ne 0 ]; then
     echo "Static Llama compile only with weight sharing test failed. $exit_code1."
@@ -50,8 +54,12 @@ if [ $exit_code2 -ne 0 ]; then
     echo "Static Llama accuracy test failed. $exit_code2."
 fi
 
+if [ $exit_code3 -ne 0 ]; then
+    echo "Static Llama BACKWARD COMPATIBILITY test failed. $exit_code3."
+fi
+
 # Return failure if either program failed
-if [ $exit_code1 -ne 0 ] || [ $exit_code2 -ne 0 ]; then
+if [ $exit_code1 -ne 0 ] || [ $exit_code2 -ne 0 ] || [ $exit_code3 -ne 0 ]; then
     exit 1
 else
     exit 0
 
@@ -355,8 +355,8 @@ jobs:
                 "--recipe" "xnnpack"
                 "--use_custom_sdpa"
                 "--use_custom_kv_cache"
-                "--qlinear"
-                "--qembedding"
+                "--qlinear" "8da4w"
+                "--qembedding" "8w"
                 "--output_dir" ".."
               )
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-a3942627f5ac048e06b4b1d703b0a6a53bf6da5b`
	`1`	`+eea657ddbdeb1118943a92fb73c289985c3ee1ba`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-ab43fe4bdf5ccd82897f0e982c451a0127bd175e`
	`1`	`+6fc0ad22f0a07b6f38d138861c56a765d5a9bb02`