pytorch
diff --git a/‎backends/qualcomm/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎backends/qualcomm/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/qualcomm/scripts/build.sh‎
Lines changed: 3 additions & 3 deletions b/‎backends/qualcomm/scripts/build.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎benchmark.py‎
Lines changed: 211 additions & 0 deletions b/‎benchmark.py‎
Lines changed: 211 additions & 0 deletions
diff --git a/‎build_xnnpack.sh‎
Lines changed: 39 additions & 0 deletions b/‎build_xnnpack.sh‎
Lines changed: 39 additions & 0 deletions
@@ -253,7 +253,7 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
 
   pybind11_extension(PyQnnManagerAdaptor)
   pybind11_extension(PyQnnWrapperAdaptor)
-  if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo)
+  if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES RelWithDebInfo)
     # Strip unnecessary sections of the binary
     pybind11_strip(PyQnnManagerAdaptor)
     pybind11_strip(PyQnnWrapperAdaptor)
 
@@ -30,7 +30,7 @@ CMAKE_X86_64="build-x86"
 BUILD_AARCH64="true"
 CMAKE_AARCH64="build-android"
 CLEAN="true"
-BUILD_TYPE="Debug"
+BUILD_TYPE="RelWithDebInfo"
 BUILD_JOB_NUMBER="16"
 
 if [ -z PYTHON_EXECUTABLE ]; then
@@ -71,7 +71,7 @@ if [ "$BUILD_AARCH64" = true ]; then
         rm -rf $BUILD_ROOT && mkdir $BUILD_ROOT
     else
         # Force rebuild flatccrt for the correct platform
-        cd $BUILD_ROOT/devtools && make clean
+        cd $BUILD_ROOT/third-party/flatcc && make clean
     fi
 
     cd $BUILD_ROOT
@@ -116,7 +116,7 @@ if [ "$BUILD_X86_64" = true ]; then
         rm -rf $BUILD_ROOT && mkdir $BUILD_ROOT
     else
         # Force rebuild flatccrt for the correct platform
-        cd $BUILD_ROOT/devtools && make clean
+        cd $BUILD_ROOT/third-party/flatcc && make clean
     fi
 
     cd $BUILD_ROOT
 
@@ -0,0 +1,211 @@
+import argparse
+import os
+import subprocess
+
+qnn_sdk = os.getenv("QNN_SDK_ROOT")
+htp_arch = "79"
+workspace = "/data/local/tmp/et_ga_benchmark"
+memory_script_file = "peak_memory.sh"
+perf_file = "statistics.txt"
+
+
+def get_artifacts(backend, pte_path):
+    def get_build_dir(backend):
+        build_dir = {
+            "qnn": "build-android",
+            "xnn": "build-xnnpack",
+        }
+        return build_dir[backend]
+
+    memory_script = """$@ 2> /dev/null &
+
+PROCESS=$1
+PEAK_MEM=0
+SAMPLES=0
+TOTAL=0
+while true; do
+    PID=$(pidof $PROCESS)
+    if [ "$PID" != "" ]; then
+        DMA=$(dmabuf_dump $PID | grep "PROCESS TOTAL" | awk '{ print $3 }')
+        PSS=$(dumpsys meminfo -s $PID | grep "TOTAL PSS" | awk '{ print $3 }')
+        if [ "$PSS" == "" ]; then
+            continue
+        fi
+        CURRENT=$(($DMA+$PSS))
+        if [ CURRENT -gt PEAK_MEM ]; then
+            PEAK_MEM=$CURRENT
+        fi
+        SAMPLES=$(($SAMPLES+1))
+        TOTAL=$(($TOTAL+$CURRENT))
+    else
+        break
+    fi
+done
+
+rm -rf memory_usage.txt
+echo "peak_mem: $PEAK_MEM" >> statistics.txt
+AVG_MEM=$(awk -- 'BEGIN{printf "%.3f", ARGV[1]/ARGV[2]}' "$TOTAL" "$SAMPLES")
+echo "avg_mem: $AVG_MEM" >> statistics.txt
+    """
+    with open(memory_script_file, "w") as f:
+        f.write(memory_script)
+
+    runner = {
+        "qnn": f"{get_build_dir(backend)}/examples/qualcomm/executor_runner/qnn_executor_runner",
+        "xnn": f"{get_build_dir(backend)}/backends/xnnpack/xnn_executor_runner",
+    }
+    artifacts = {
+        "qnn": [
+            pte_path,
+            f"{qnn_sdk}/lib/aarch64-android/libQnnHtp.so",
+            (
+                f"{qnn_sdk}/lib/hexagon-v{htp_arch}/"
+                f"unsigned/libQnnHtpV{htp_arch}Skel.so"
+            ),
+            (f"{qnn_sdk}/lib/aarch64-android/" f"libQnnHtpV{htp_arch}Stub.so"),
+            f"{qnn_sdk}/lib/aarch64-android/libQnnHtpPrepare.so",
+            f"{qnn_sdk}/lib/aarch64-android/libQnnSystem.so",
+            f"{get_build_dir(backend)}/backends/qualcomm/libqnn_executorch_backend.so",
+            f"{qnn_sdk}/lib/aarch64-android/libQnnModelDlc.so",
+            runner[backend],
+            memory_script_file,
+        ],
+        "xnn": [
+            pte_path,
+            runner[backend],
+            memory_script_file,
+        ],
+    }
+    return artifacts[backend]
+
+
+def get_cmds(backend, pte_path, iteration):
+    cmd_args = {
+        "qnn": (
+            [
+                f"--model_path {os.path.basename(pte_path)}",
+                f"--iteration {iteration}",
+                "--dump_statistics",
+            ]
+        ),
+        "xnn": (
+            [
+                f"--model_path {os.path.basename(pte_path)}",
+                f"--num_executions {iteration}",
+                "--dump_statistics",
+            ]
+        ),
+    }
+    cmds_for_inference = {
+        "qnn": (
+            " ".join(
+                [
+                    f"cd {workspace} &&",
+                    "chmod +x ./qnn_executor_runner &&",
+                    f"./qnn_executor_runner {' '.join(cmd_args[backend])}",
+                ]
+            )
+        ),
+        "xnn": (
+            " ".join(
+                [
+                    f"cd {workspace} &&",
+                    "chmod +x ./xnn_executor_runner &&",
+                    f"./xnn_executor_runner {' '.join(cmd_args[backend])}",
+                ]
+            )
+        ),
+    }
+    # do not dump inference metrics during profiling memory
+    for _, v in cmd_args.items():
+        v.pop()
+    cmds_for_memory = {
+        "qnn": (
+            " ".join(
+                [
+                    f"cd {workspace} &&",
+                    "chmod +x ./qnn_executor_runner &&",
+                    f"chmod +x {memory_script_file} &&",
+                    f"./{memory_script_file} ./qnn_executor_runner {' '.join(cmd_args[backend])}",
+                ]
+            )
+        ),
+        "xnn": (
+            " ".join(
+                [
+                    f"cd {workspace} &&",
+                    "chmod +x ./xnn_executor_runner &&",
+                    f"chmod +x {memory_script_file} &&",
+                    f"./{memory_script_file} ./xnn_executor_runner {' '.join(cmd_args[backend])}",
+                ]
+            )
+        ),
+    }
+    return [cmds_for_inference[backend], cmds_for_memory[backend]]
+
+
+def start_benchmark(artifacts, cmds, device, host):
+    def adb(action):
+        if not host:
+            actions = ["adb", "-s", device]
+        else:
+            actions = ["adb", "-H", host, "-s", device]
+        actions.extend(action)
+        subprocess.run(actions, stdout=subprocess.DEVNULL)
+
+    def post_process():
+        subprocess.run(["rm", "-rf", perf_file], stdout=subprocess.DEVNULL)
+        for file_name in [perf_file]:
+            adb(["pull", f"{workspace}/{file_name}", "."])
+            with open(file_name, "r") as f:
+                print(f.read())
+
+    adb(["shell", "rm", "-rf", workspace])
+    adb(["shell", "mkdir", "-p", workspace])
+    for artifact in artifacts:
+        adb(["push", artifact, workspace])
+    for cmd in cmds:
+        adb(["shell", cmd])
+    post_process()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-b",
+        "--backend",
+        help="either 'qnn' or 'xnn'",
+        required=True,
+    )
+    parser.add_argument(
+        "-p",
+        "--pte",
+        help="path to generated .pte file",
+        required=True,
+    )
+    parser.add_argument(
+        "-H",
+        "--host",
+        help="hostname for adb gateway",
+        required=False,
+    )
+    parser.add_argument(
+        "-s",
+        "--device",
+        help="serial number for adb device",
+        required=True,
+    )
+    parser.add_argument(
+        "-i",
+        "--iteration",
+        help="total number of inferences",
+        default=100,
+        required=False,
+    )
+    args = parser.parse_args()
+    start_benchmark(
+        artifacts=get_artifacts(args.backend, args.pte),
+        cmds=get_cmds(args.backend, args.pte, args.iteration),
+        device=args.device,
+        host=args.host,
+    )
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+if [[ -z $ANDROID_NDK_ROOT ]]; then
+  echo "Please export ANDROID_NDK_ROOT=/path/to/ndk"
+  exit -1
+fi
+
+CLEAN_BUILD="false"
+BUILD_FOLDER="build-xnnpack"
+BUILD_TYPE="release"
+
+while [[ "$#" -gt 0 ]]; do
+  case "$1" in
+    -c|--clean_build) CLEAN_BUILD="true"; shift;;
+    -d|--debug) BUILD_TYPE="Debug"; shift;;
+    *) echo "unknow arg passed: $1"; exit 1;;
+  esac
+  shift
+done
+
+if [ "$CLEAN_BUILD" = true ]; then
+  rm -rf $BUILD_FOLDER
+fi
+
+cmake \
+  -DCMAKE_INSTALL_PREFIX=$BUILD_FOLDER \
+  -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
+  -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
+  -DANDROID_ABI='arm64-v8a' \
+  -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+  -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+  -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+  -DEXECUTORCH_BUILD_XNNPACK=ON \
+  -DEXECUTORCH_ENABLE_LOGGING=ON \
+  -DPYTHON_EXECUTABLE=python \
+  -B$BUILD_FOLDER .
+
+cmake --build $BUILD_FOLDER -j9 --target install --config $BUILD_TYPE
+