Qualcomm AI Engine Direct - fix release build issue

haowhsu-quic · web-flow · commit 7f34796b69b9 · 2024-08-10T12:10:42.000-07:00
Differential Revision: D61026686 Pull Request resolved: #4625
diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt
@@ -58,6 +58,7 @@ add_compile_options("-Wall" "-Werror" "-Wno-sign-compare")
 # which can be ignored by GNU. So we make it a warning, not an error in GNU.
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
   add_compile_options("-Wno-error=attributes")
+  add_link_options("-flto=auto")
 endif()
 
 if(CMAKE_BUILD_TYPE STREQUAL "Release")
@@ -67,7 +68,6 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release")
   # --gc-sections is added by torch.
   add_compile_options(
     "-O3" "-ffunction-sections" "-fdata-sections" "-frtti"
-    "-Wno-unused-command-line-argument"
   )
 endif()
 
@@ -259,6 +259,22 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
     pybind11_strip(PyQnnWrapperAdaptor)
   endif()
 
+  if(CMAKE_BUILD_TYPE STREQUAL "Release")
+    # need to allow exceptions in pybind
+    set(_pybind_compile_options
+      -Wno-deprecated-declarations
+      -fPIC
+      -frtti
+      -fexceptions
+    )
+    target_compile_options(
+      PyQnnManagerAdaptor PUBLIC ${_pybind_compile_options}
+    )
+    target_compile_options(
+      PyQnnWrapperAdaptor PUBLIC ${_pybind_compile_options}
+    )
+  endif()
+
   add_subdirectory(
     ${QNN_EXECUTORCH_ROOT_DIR}/aot/python
     ${CMAKE_CURRENT_BINARY_DIR}/qnn_executorch/python
diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh
@@ -64,9 +64,13 @@ if [ "$BUILD_AARCH64" = true ]; then
         echo "Please export ANDROID_NDK_ROOT=/path/to/android_ndkXX"
         exit -1
     fi
+
     BUILD_ROOT=$PRJ_ROOT/$CMAKE_AARCH64
     if [ "$CLEAN" = true ]; then
         rm -rf $BUILD_ROOT && mkdir $BUILD_ROOT
+    else
+        # Force rebuild flatccrt for the correct platform
+        cd $BUILD_ROOT/sdk && make clean
     fi
 
     cd $BUILD_ROOT
@@ -103,15 +107,17 @@ if [ "$BUILD_AARCH64" = true ]; then
 fi
 
 if [ "$BUILD_X86_64" = true ]; then
-    # Build python interface
     BUILD_ROOT=$PRJ_ROOT/$CMAKE_X86_64
     if [ "$CLEAN" = true ]; then
         rm -rf $BUILD_ROOT && mkdir $BUILD_ROOT
+    else
+        # Force rebuild flatccrt for the correct platform
+        cd $BUILD_ROOT/sdk && make clean
     fi
+
     cd $BUILD_ROOT
-    # TODO: Use CMAKE_BUILD_TYPE=RelWithDebInfo, and handle flatcc issues
     cmake \
-        -DCMAKE_BUILD_TYPE=Debug \
+        -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
         -DCMAKE_INSTALL_PREFIX=$BUILD_ROOT \
         -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
         -DEXECUTORCH_BUILD_QNN=ON \
@@ -131,7 +137,7 @@ if [ "$BUILD_X86_64" = true ]; then
    CMAKE_PREFIX_PATH="${BUILD_ROOT}/lib/cmake/ExecuTorch;${BUILD_ROOT}/third-party/gflags;"
 
    cmake $PRJ_ROOT/$EXAMPLE_ROOT \
-       -DCMAKE_BUILD_TYPE=Debug \
+       -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
        -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
        -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
        -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt
@@ -77,15 +77,16 @@ list(PREPEND _qnn_executor_runner__srcs
 )
 # preprocess llama runner src files
 list(TRANSFORM _qnn_llama_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
-list(FILTER _qnn_llama_runner__srcs EXCLUDE REGEX ".*runner.cpp$")
+list(FILTER _qnn_llama_runner__srcs EXCLUDE REGEX ".*(/runner/).*")
+message(ERROR ${_qnn_llama_runner__srcs})
 list(PREPEND _qnn_llama_runner__srcs
   ${CMAKE_CURRENT_LIST_DIR}/executor_runner/qnn_llama_runner.cpp
   ${CMAKE_CURRENT_LIST_DIR}/llama2/runner/runner.cpp
   ${CMAKE_CURRENT_LIST_DIR}/llama2/runner/runner.h
 )
 # preprocess qaihub llama runner src files
 list(TRANSFORM _qnn_qaihub_llama_runner__srcs PREPEND "${EXECUTORCH_SOURCE_DIR}/")
-list(FILTER _qnn_qaihub_llama_runner__srcs EXCLUDE REGEX ".*runner.cpp*$")
+list(FILTER _qnn_qaihub_llama_runner__srcs EXCLUDE REGEX ".*(/runner/).*")
 list(PREPEND _qnn_qaihub_llama_runner__srcs
   ${CMAKE_CURRENT_LIST_DIR}/executor_runner/qnn_qaihub_llama_runner.cpp
   ${CMAKE_CURRENT_LIST_DIR}/llama2/qaihub_runner/runner.cpp
@@ -103,9 +104,6 @@ target_link_libraries(
   qnn_executor_runner qnn_executorch_backend full_portable_ops_lib etdump
   ${FLATCCRT_LIB} gflags
 )
-if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-  target_link_options(qnn_executor_runner PUBLIC -fsanitize=undefined)
-endif()
 
 # build llama runner
 add_executable(qnn_llama_runner ${_qnn_llama_runner__srcs})
diff --git a/examples/qualcomm/llama2/qaihub_runner/runner.cpp b/examples/qualcomm/llama2/qaihub_runner/runner.cpp
@@ -11,18 +11,17 @@
 
 #include <executorch/examples/qualcomm/llama2/qaihub_runner/runner.h>
 #include <executorch/extension/evalue_util/print_evalue.h>
+#include <executorch/extension/llm/runner/util.h>
 #include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
 #include <executorch/extension/runner_util/managed_tensor.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
+#include <executorch/runtime/platform/log.h>
 
 #include <ctime>
 #include <memory>
 #include <sstream>
 
-#include <executorch/examples/models/llama2/runner/util.h>
-#include <executorch/runtime/core/exec_aten/exec_aten.h>
-#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
-#include <executorch/runtime/platform/log.h>
-
 #if defined(__aarch64__)
 #include "arm_neon.h"
 #endif
diff --git a/examples/qualcomm/llama2/runner/runner.cpp b/examples/qualcomm/llama2/runner/runner.cpp
@@ -11,18 +11,17 @@
 
 #include <executorch/examples/qualcomm/llama2/runner/runner.h>
 #include <executorch/extension/evalue_util/print_evalue.h>
+#include <executorch/extension/llm/runner/util.h>
 #include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
 #include <executorch/extension/runner_util/managed_tensor.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
+#include <executorch/runtime/platform/log.h>
 
 #include <ctime>
 #include <memory>
 #include <sstream>
 
-#include <executorch/examples/models/llama2/runner/util.h>
-#include <executorch/runtime/core/exec_aten/exec_aten.h>
-#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
-#include <executorch/runtime/platform/log.h>
-
 namespace torch {
 namespace executor {
 
diff --git a/sdk/CMakeLists.txt b/sdk/CMakeLists.txt
@@ -62,6 +62,10 @@ set(FLATCC_REFLECTION
     OFF
     CACHE BOOL ""
 )
+set(FLATCC_DEBUG_CLANG_SANITIZE
+    OFF
+    CACHE BOOL ""
+)
 set(_flatcc_source_dir ${CMAKE_CURRENT_SOURCE_DIR}/../third-party/flatcc)
 add_subdirectory(${_flatcc_source_dir} ${CMAKE_BINARY_DIR}/third-party/flatcc)