pytorch
diff --git a/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 8 additions & 0 deletions b/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/test_llava.sh‎
Lines changed: 7 additions & 3 deletions b/‎.ci/scripts/test_llava.sh‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 2 deletions b/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎.ci/scripts/test_phi_3_mini.sh‎
Lines changed: 4 additions & 0 deletions b/‎.ci/scripts/test_phi_3_mini.sh‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 59 additions & 0 deletions b/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎.ci/scripts/utils.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/utils.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/scripts/propose_ghstack_orig_pr.py‎
Lines changed: 1 addition & 1 deletion b/‎.github/scripts/propose_ghstack_orig_pr.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/apple-perf.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/apple-perf.yml‎
Lines changed: 2 additions & 0 deletions
@@ -10,6 +10,12 @@ set -exu
 # shellcheck source=/dev/null
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+which "${PYTHON_EXECUTABLE}"
+CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')"
+
 install_executorch_and_backend_lib() {
   echo "Installing executorch and xnnpack backend"
   clean_executorch_install_folders
@@ -22,6 +28,7 @@ install_executorch_and_backend_lib() {
     -DANDROID_ABI="${ANDROID_ABI}" \
     -DCMAKE_INSTALL_PREFIX=cmake-android-out \
     -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
@@ -47,6 +54,7 @@ build_llama_runner() {
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \
     -Bcmake-android-out/examples/models/llama examples/models/llama
 
     cmake --build cmake-android-out/examples/models/llama -j4 --config Release
 
@@ -154,6 +154,7 @@ cmake_install_executorch_libraries() {
     rm -rf cmake-out
     retry cmake \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')" \
         -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
 
@@ -30,9 +30,11 @@ fi
 NPROC=8
 if hash nproc &> /dev/null; then NPROC=$(nproc); fi
 
+python_lib=$($PYTHON_EXECUTABLE -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')
+CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')"
 EXECUTORCH_COMMON_CMAKE_ARGS="                      \
         -DCMAKE_INSTALL_PREFIX=${BUILD_DIR}         \
-        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}            \
+        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}      \
         -DEXECUTORCH_ENABLE_LOGGING=ON              \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON      \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
@@ -46,6 +48,7 @@ EXECUTORCH_COMMON_CMAKE_ARGS="                      \
 cmake_install_executorch_libraries() {
     cmake                               \
         ${EXECUTORCH_COMMON_CMAKE_ARGS} \
+        "-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}" \
         -B${BUILD_DIR} .
 
     cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${CMAKE_BUILD_TYPE}
@@ -56,6 +59,7 @@ cmake_install_executorch_libraries_for_android() {
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI=arm64-v8a                                                 \
         ${EXECUTORCH_COMMON_CMAKE_ARGS}                                         \
+        "-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}" \
         -B${BUILD_DIR} .
 
     cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${CMAKE_BUILD_TYPE}
@@ -76,7 +80,7 @@ cmake_build_llava_runner() {
 
     cmake                                 \
         ${LLAVA_COMMON_CMAKE_ARGS}        \
-        -DCMAKE_PREFIX_PATH="$python_lib" \
+        -DCMAKE_PREFIX_PATH="$python_lib;${CMAKE_PREFIX_PATH}" \
         -B${BUILD_DIR}/${dir}             \
         ${dir}
 
@@ -92,7 +96,7 @@ cmake_build_llava_runner_for_android() {
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI=arm64-v8a                                                 \
         ${LLAVA_COMMON_CMAKE_ARGS}                                              \
-        -DCMAKE_PREFIX_PATH="$python_lib"                                       \
+        -DCMAKE_PREFIX_PATH="$python_lib;${CMAKE_PREFIX_PATH}"                  \
         -DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON                                  \
         -B${BUILD_DIR}/${dir}                                                   \
         ${dir}
 
@@ -50,10 +50,12 @@ prepare_artifacts_upload() {
 
 build_cmake_executor_runner() {
   echo "Building executor_runner"
+  CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')"
   rm -rf ${CMAKE_OUTPUT_DIR}
   cmake -DCMAKE_BUILD_TYPE=Debug \
       -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
       -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
+      -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
       -B${CMAKE_OUTPUT_DIR} .
 
   cmake --build ${CMAKE_OUTPUT_DIR} -j4 --config Debug
@@ -98,8 +100,7 @@ test_model() {
 
 build_cmake_xnn_executor_runner() {
   echo "Building xnn_executor_runner"
-  SITE_PACKAGES="$(${PYTHON_EXECUTABLE} -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
-  CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch"
+  CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')"
 
   (rm -rf ${CMAKE_OUTPUT_DIR} \
     && mkdir ${CMAKE_OUTPUT_DIR} \
 
@@ -22,8 +22,10 @@ NPROC=8
 if hash nproc &> /dev/null; then NPROC=$(nproc); fi
 
 cmake_install_executorch_libraries() {
+  CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')"
   cmake -DPYTHON_EXECUTABLE=python \
       -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
+      -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \
       -DEXECUTORCH_ENABLE_LOGGING=1 \
       -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
       -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
@@ -39,8 +41,10 @@ cmake_install_executorch_libraries() {
 }
 
 cmake_build_phi_3_mini() {
+  CMAKE_PREFIX_PATH="$(python3 -c 'import torch as _; print(_.__path__[0])')"
   cmake -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
       -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
+      -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \
       -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
       -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
       -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
 
@@ -0,0 +1,59 @@
+#!/bin/bash
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
+export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
+export PYTHONPATH=".."
+cp schema/program.fbs exir/_serialize/program.fbs
+cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
+cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+which "${PYTHON_EXECUTABLE}"
+
+# Although static llama CI does not require graphviz, it is required by test_qnn_delegate.py
+pip install graphviz
+
+# Download stories llama110m artifacts
+download_stories_model_artifacts
+echo "Creating tokenizer.bin"
+$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
+
+set +e
+# Compile only as weight sharing is not applicable on x86
+$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --compile_only
+exit_code1=$?
+
+# Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
+$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir . --enable_x86_64
+exit_code2=$?
+
+# Check the exit codes and print messages
+if [ $exit_code1 -ne 0 ]; then
+    echo "Static Llama compile only with weight sharing test failed. $exit_code1."
+fi
+
+if [ $exit_code2 -ne 0 ]; then
+    echo "Static Llama accuracy test failed. $exit_code2."
+fi
+
+# Return failure if either program failed
+if [ $exit_code1 -ne 0 ] || [ $exit_code2 -ne 0 ]; then
+    exit 1
+else
+    exit 0
+fi
+set -e
@@ -136,6 +136,7 @@ cmake_install_executorch_lib() {
   clean_executorch_install_folders
   retry cmake -DBUCK2="$BUCK" \
           -DCMAKE_INSTALL_PREFIX=cmake-out \
+          -DCMAKE_PREFIX_PATH="$($PYTHON_EXECUTABLE -c 'import torch as _; print(_.__path__[0])')" \
           -DCMAKE_BUILD_TYPE=Release \
           -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
           -Bcmake-out .
 
@@ -101,7 +101,7 @@ def create_prs_for_orig_branch(pr_stack: List[int], repo: Repository):
         # gh/user/x/orig is the clean diff between gh/user/x/base <- gh/user/x/head
         orig_branch_merge_head = pr.base.ref.replace("base", "orig")
         bot_metadata = f"""This PR was created by the merge bot to help merge the original PR into the main branch.
-ghstack PR number: https://github.com/pytorch/executorch/pull/{pr.number}
+ghstack PR number: https://github.com/pytorch/executorch/pull/{pr.number} by @{pr.user.login}
 ^ Please use this as the source of truth for the PR details, comments, and reviews
 ghstack PR base: https://github.com/pytorch/executorch/tree/{pr.base.ref}
 ghstack PR head: https://github.com/pytorch/executorch/tree/{pr.head.ref}
 
@@ -222,6 +222,7 @@ jobs:
                       --preq_mode 8da4w_output_8da8w \
                       --preq_group_size 32 \
                       --max_seq_length 2048 \
+                      --max_context_length 2048 \
                       --output_name "${OUT_ET_MODEL_NAME}.pte" \
                       -kv \
                       -d fp32 \
@@ -253,6 +254,7 @@ jobs:
                       --xnnpack-extended-ops \
                       -d fp32 \
                       --max_seq_length 2048 \
+                      --max_context_length 2048 \
                       --output_name "${OUT_ET_MODEL_NAME}.pte" \
                       --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
                     ls -lh "${OUT_ET_MODEL_NAME}.pte"
 
@@ -233,6 +233,7 @@ jobs:
                 --preq_mode 8da4w_output_8da8w \
                 --preq_group_size 32 \
                 --max_seq_length 2048 \
+                --max_context_length 2048 \
                 --output_name "${OUT_ET_MODEL_NAME}.pte" \
                 -kv \
                 -d fp32 \
@@ -264,6 +265,7 @@ jobs:
                 --xnnpack-extended-ops \
                 -d fp32 \
                 --max_seq_length 2048 \
+                --max_context_length 2048 \
                 --output_name "${OUT_ET_MODEL_NAME}.pte" \
                 --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
               ls -lh "${OUT_ET_MODEL_NAME}.pte"