pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 2 additions & 0 deletions b/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/setup-qnn-deps.sh‎
Lines changed: 2 additions & 42 deletions b/‎.ci/scripts/setup-qnn-deps.sh‎
Lines changed: 2 additions & 42 deletions
diff --git a/‎.ci/scripts/test_ane_static_llama.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_ane_static_llama.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/test_llama_torchao_lowbit.sh‎
Lines changed: 2 additions & 0 deletions b/‎.ci/scripts/test_llama_torchao_lowbit.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llava.sh‎
Lines changed: 2 additions & 0 deletions b/‎.ci/scripts/test_llava.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 17 additions & 6 deletions b/‎.ci/scripts/test_model.sh‎
Lines changed: 17 additions & 6 deletions
@@ -1 +1 @@
-a3942627f5ac048e06b4b1d703b0a6a53bf6da5b
+eea657ddbdeb1118943a92fb73c289985c3ee1ba
@@ -1 +1 @@
-ab43fe4bdf5ccd82897f0e982c451a0127bd175e
+6fc0ad22f0a07b6f38d138861c56a765d5a9bb02
@@ -33,6 +33,8 @@ set_up_aot() {
       -DEXECUTORCH_BUILD_DEVTOOLS=ON \
       -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
       -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_EXTENSION_LLM=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_EXTENSION_LLM_RUNNER=ON \
       -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
       -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
       -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
 
@@ -19,7 +19,7 @@ install_executorch_and_backend_lib() {
   echo "Installing executorch and xnnpack backend"
   clean_executorch_install_folders
   mkdir cmake-android-out
-  ANDROID_NDK=/opt/ndk
+  ANDROID_NDK=${ANDROID_NDK:-/opt/ndk}
   BUCK2=buck2
   ANDROID_ABI=arm64-v8a
   cmake --preset llm \
 
@@ -7,47 +7,7 @@
 
 set -ex
 
-verify_pkg_installed() {
-  echo $(dpkg-query -W --showformat='${Status}\n' $1|grep "install ok installed")
-}
+source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
 
-install_qnn() {
-  echo "Start installing qnn."
-  QNN_INSTALLATION_DIR=/tmp/qnn
-  mkdir -p "${QNN_INSTALLATION_DIR}"
-
-  curl -Lo /tmp/v2.28.0.24.10.29.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.28.0.241029.zip"
-  echo "Finishing downloading qnn sdk."
-  unzip -qo /tmp/v2.28.0.24.10.29.zip -d /tmp
-  echo "Finishing unzip qnn sdk."
-
-
-  # Print the content for manual verification
-  ls -lah "/tmp/qairt"
-  mv "/tmp/qairt"/* "${QNN_INSTALLATION_DIR}"
-  echo "Finishing installing qnn '${QNN_INSTALLATION_DIR}' ."
-
-  ls -lah "${QNN_INSTALLATION_DIR}"
-}
-
-setup_libc++() {
-  clang_version=$1
-  sudo apt-get update
-  pkgs_to_check=("libc++-${clang_version}-dev")
-  j=0
-  while [ $j -lt ${#pkgs_to_check[*]} ]; do
-    install_status=$(verify_pkg_installed ${pkgs_to_check[$j]})
-    if [ "$install_status" == "" ]; then
-      sudo apt-get install -y ${pkgs_to_check[$j]}
-      if [[ $? -ne 0 ]]; then
-        echo "ERROR: Failed to install required packages for libc++"
-        exit 1
-      fi
-    fi
-    j=$(( $j +1));
-  done
-}
-
-# This needs to match with the clang version from the Docker image
-setup_libc++ 12
+setup_libcpp 12
 install_qnn
@@ -28,6 +28,6 @@ pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama
 # Download stories llama110m artifacts
 download_stories_model_artifacts
 
-python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w
+python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w --embedding-quantize 4,32
 
 popd
@@ -150,6 +150,7 @@ cmake_install_executorch_libraries() {
     echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
     rm -rf cmake-out
     retry cmake --preset llm \
+        -DBUILD_TESTING=OFF \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
         -DEXECUTORCH_BUILD_QNN="$QNN" \
 
@@ -29,6 +29,8 @@ cmake -DPYTHON_EXECUTABLE=python \
     -DEXECUTORCH_ENABLE_LOGGING=1 \
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
 
@@ -38,6 +38,8 @@ EXECUTORCH_COMMON_CMAKE_ARGS="                      \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON      \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
         -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON      \
         -DEXECUTORCH_BUILD_KERNELS_LLM=ON        \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON     \
 
@@ -232,21 +232,24 @@ test_model_with_qnn() {
 # @param should_test If true, build and test the model using the coreml_executor_runner.
 test_model_with_coreml() {
   local should_test="$1"
+  local test_with_pybindings="$2"
+  local dtype="$3"
 
   if [[ "${BUILD_TOOL}" != "cmake" ]]; then
     echo "coreml only supports cmake."
     exit 1
   fi
 
-  DTYPE=float16
+  RUN_WITH_PYBINDINGS=""
+  if [[ "${test_with_pybindings}" == true ]]; then
+    echo \"Running with pybindings\"
+    export RUN_WITH_PYBINDINGS="--run_with_pybindings"
+  fi
 
-  "${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision "${DTYPE}" --use_partitioner
+  "${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision ${dtype} --use_partitioner ${RUN_WITH_PYBINDINGS}
   EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit)
 
   if [ -n "$EXPORTED_MODEL" ]; then
-    EXPORTED_MODEL_WITH_DTYPE="${EXPORTED_MODEL%.pte}_${DTYPE}.pte"
-    mv "$EXPORTED_MODEL" "$EXPORTED_MODEL_WITH_DTYPE"
-    EXPORTED_MODEL="$EXPORTED_MODEL_WITH_DTYPE"
     echo "OK exported model: $EXPORTED_MODEL"
   else
     echo "[error] failed to export model: no .pte file found"
@@ -303,7 +306,15 @@ elif [[ "${BACKEND}" == *"coreml"* ]]; then
   if [[ "${BACKEND}" == *"test"* ]]; then
     should_test_coreml=true
   fi
-  test_model_with_coreml "${should_test_coreml}"
+  test_with_pybindings=false
+  if [[ "${BACKEND}" == *"pybind"* ]]; then
+    test_with_pybindings=true
+  fi
+  dtype=float16
+  if [[ "${BACKEND}" == *"float32"* ]]; then
+    dtype=float32
+  fi
+  test_model_with_coreml "${should_test_coreml}" "${test_with_pybindings}" "${dtype}"
   if [[ $? -eq 0 ]]; then
     prepare_artifacts_upload
   fi
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-a3942627f5ac048e06b4b1d703b0a6a53bf6da5b`
	`1`	`+eea657ddbdeb1118943a92fb73c289985c3ee1ba`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-ab43fe4bdf5ccd82897f0e982c451a0127bd175e`
	`1`	`+6fc0ad22f0a07b6f38d138861c56a765d5a9bb02`