Update on "[Executorch] Add quantized kv cache to oss ci"

kimishpatel · kimishpatel · commit 10d02926ac09 · 2024-11-26T21:22:42.000-08:00
Fixes to make sure quantized kv cache works in oss Differential Revision: [D66269487](https://our.internmc.facebook.com/intern/diff/D66269487/) [ghstack-poisoned]
diff --git a/.ci/scripts/build-qnn-sdk.sh b/.ci/scripts/build-qnn-sdk.sh
@@ -12,19 +12,13 @@ build_qnn_backend() {
   echo "Start building qnn backend."
   export ANDROID_NDK_ROOT=/opt/ndk
   export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
-  export EXECUTORCH_SRC_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
-  export EXECUTORCH_INSTALL_ROOT="$(python -c 'import executorch; print(executorch.__path__[0])')"
-  if [ "$EXECUTORCH_INSTALL_ROOT" == "" ]; then
-    echo "Failed to find where executorch package is installed."
-    echo "import executorch failed"
-    exit -1
-  fi
+  export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
 
   bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number 2 --release
 }
 
 set_up_aot() {
-  cd $EXECUTORCH_SRC_ROOT
+  cd $EXECUTORCH_ROOT
   if [ ! -d "cmake-out" ]; then
       mkdir cmake-out
   fi
@@ -42,8 +36,8 @@ set_up_aot() {
   cmake --build $PWD --target "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j$(nproc)
   # install Python APIs to correct import path
   # The filename might vary depending on your Python and host version.
-  cp -f backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_INSTALL_ROOT/backends/qualcomm/python
-  cp -f backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_INSTALL_ROOT/backends/qualcomm/python
+  cp -f backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python
+  cp -f backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python
   popd
 
   # Workaround for fbs files in exir/_serialize
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
@@ -113,6 +113,7 @@ if [[ "${MODE}" =~ .*qnn.* ]]; then
   export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
   export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
   export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
+  export PYTHONPATH=".."
   cp schema/program.fbs exir/_serialize/program.fbs
   cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
   cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
@@ -152,8 +152,10 @@ test_model_with_qnn() {
   source "$(dirname "${BASH_SOURCE[0]}")/build-qnn-sdk.sh"
   echo "ANDROID_NDK_ROOT: $ANDROID_NDK_ROOT"
   echo "QNN_SDK_ROOT: $QNN_SDK_ROOT"
+  echo "EXECUTORCH_ROOT: $EXECUTORCH_ROOT"
 
   export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
+  export PYTHONPATH=$EXECUTORCH_ROOT/..
 
   if [[ "${MODEL_NAME}" == "dl3" ]]; then
     EXPORT_SCRIPT=deeplab_v3
diff --git a/examples/models/llama/source_transformation/quantized_kv_cache.py b/examples/models/llama/source_transformation/quantized_kv_cache.py
@@ -11,6 +11,8 @@
 import torch.nn as nn
 from executorch.examples.models.llama.llama_transformer import KVCache
 
+# This is needed to ensure that custom ops are registered
+from executorch.extension.pybindings import portable_lib  # noqa # usort: skip
 from executorch.extension.llm.custom_ops import custom_ops  # noqa: F401
 from torch.ao.quantization.fx._decomposed import quantized_decomposed_lib  # noqa: F401
 
@@ -23,7 +25,10 @@
 
     import executorch
 
-    executorch_package_path = executorch.__path__[0]
+    # Ideally package is installed in only one location but usage of
+    # PYATHONPATH can result in multiple locations.
+    # ATM this is mainly used in CI for qnn runner. Will need to revisit this
+    executorch_package_path = executorch.__path__[-1]
     libs = list(
         glob.glob(
             f"{executorch_package_path}/**/libquantized_ops_aot_lib.*", recursive=True
diff --git a/extension/llm/custom_ops/custom_ops.py b/extension/llm/custom_ops/custom_ops.py
@@ -26,7 +26,10 @@
 
     import executorch
 
-    executorch_package_path = executorch.__path__[0]
+    # Ideally package is installed in only one location but usage of
+    # PYATHONPATH can result in multiple locations.
+    # ATM this is mainly used in CI for qnn runner. Will need to revisit this
+    executorch_package_path = executorch.__path__[-1]
     logging.info(f"Looking for libcustom_ops_aot_lib.so in {executorch_package_path }")
     libs = list(
         glob.glob(