diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt index 33f150413a3..0ef1ecdaf65 100644 --- a/backends/qualcomm/CMakeLists.txt +++ b/backends/qualcomm/CMakeLists.txt @@ -39,17 +39,6 @@ if(${ANDROID}) find_library(android_log log) endif() -set(qcir_schema_include_dir ${CMAKE_CURRENT_LIST_DIR}/aot/ir) -set(qcir_schema_output ${qcir_schema_include_dir}/qcir_generated.h) -add_custom_command( - OUTPUT qcir_schema_output - COMMAND flatc --cpp --cpp-std c++11 --scoped-enums -o - ${qcir_schema_include_dir} ${qcir_schema_include_dir}/qcir.fbs - DEPENDS flatc - COMMENT "Generating qualcomm ir schema headers" - VERBATIM -) - add_compile_options("-Wall" "-Werror" "-Wno-sign-compare") add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS) @@ -73,7 +62,6 @@ include_directories( ${_common_include_directories} ${QNN_SDK_ROOT}/include/QNN ${QNN_SDK_ROOT}/share/QNN/converter/jni - ${EXECUTORCH_SOURCE_DIR}/third-party/flatbuffers/include ${EXECUTORCH_SOURCE_DIR}/runtime/core/portable_type/c10 ) @@ -112,8 +100,6 @@ include_directories( # declare targets # add_library(executorch_backend INTERFACE) -add_library(qcir INTERFACE qcir_schema_output) -add_library(qcir_utils STATIC) add_library(qnn_backend STATIC) add_library(qnn_backend_cache STATIC) add_library(qnn_context STATIC) @@ -142,7 +128,6 @@ add_library(utils STATIC) # # declare dependency # -target_link_libraries(qcir_utils PRIVATE qcir) target_link_libraries(wrappers PRIVATE qnn_executorch_logging) target_link_libraries( qnn_implementation PRIVATE qnn_function_interface qnn_executorch_logging @@ -225,10 +210,6 @@ add_subdirectory( ${QNN_EXECUTORCH_ROOT_DIR}/aot/wrappers ${CMAKE_CURRENT_BINARY_DIR}/qnn_executorch/wrappers ) -add_subdirectory( - ${QNN_EXECUTORCH_ROOT_DIR}/aot/ir - ${CMAKE_CURRENT_BINARY_DIR}/qnn_executorch/ir -) install( TARGETS qnn_executorch_backend EXPORT ExecuTorchTargets diff --git a/backends/qualcomm/aot/ir/CMakeLists.txt b/backends/qualcomm/aot/ir/CMakeLists.txt deleted file mode 100755 index 48cb07c5dd2..00000000000 --- a/backends/qualcomm/aot/ir/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) Qualcomm Innovation Center, Inc. -# All rights reserved -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# QCIR -target_sources( - qcir_utils PRIVATE ${CMAKE_CURRENT_LIST_DIR}/qcir_utils.h - ${CMAKE_CURRENT_LIST_DIR}/qcir_utils.cpp -) diff --git a/backends/qualcomm/aot/ir/TARGETS b/backends/qualcomm/aot/ir/TARGETS deleted file mode 100644 index 0a42614a385..00000000000 --- a/backends/qualcomm/aot/ir/TARGETS +++ /dev/null @@ -1,5 +0,0 @@ -load(":targets.bzl", "define_common_targets") - -oncall("executorch") - -define_common_targets() diff --git a/backends/qualcomm/aot/ir/qcir.fbs b/backends/qualcomm/aot/ir/qcir.fbs deleted file mode 100755 index 82e56c405cc..00000000000 --- a/backends/qualcomm/aot/ir/qcir.fbs +++ /dev/null @@ -1,119 +0,0 @@ -// -// Copyright (c) Qualcomm Innovation Center, Inc. -// All rights reserved. -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. -// - -namespace qcir; - -enum TensorType : byte { - WRITE = 0, - READ, - READWRITE, - NATIVE, - STATIC, - OPTIONAL, - UNDEFINED, -} - -enum DataType : byte { - INT8 = 0, - INT16, - INT32, - INT64, - UINT8, - UINT16, - UINT32, - UINT64, - FLOAT16, - FLOAT32, - FLOAT64, - SFIXED4, - SFIXED8, - SFIXED16, - SFIXED32, - UFIXED4, - UFIXED8, - UFIXED16, - UFIXED32, - BOOL, - STRING, - UNDEFINED, -} - -enum QuantizeDef : byte { - IMPL_GENERATED = 0, - DEFINED, - UNDEFINED, -} - -enum QuantizeType : byte { - SCALE_OFFSET = 0, - AXIS_SCALE_OFFSET, - BW_SCALE_OFFSET, - BW_AXIS_SCALE_OFFSET, - BLOCKWISE_EXPANSION, - UNDEFINED, -} - -enum BlockScaleStorageType: byte { - BITWIDTH_SCALE_STORAGE_8 = 0, - BITWIDTH_SCALE_STORAGE_16, - UNDEFINED, -} - -struct ScaleOffset { - scale: float; - offset: int; -} - -table QuantizeParam { - def: QuantizeDef; - type: QuantizeType; - bitwidth: uint; - axis: int; - // used by bitwidth quantization - scales: [float]; - offsets: [int]; - // used by general quantization - data: [ScaleOffset]; - // used by block quantization - num_blocks_per_axis: uint; - block_scale_storage_type: BlockScaleStorageType; - block_scale: [ubyte]; -} - -table Tensor { - name: string; - shape: [uint]; - dynamic_dims: [ubyte]; - type: TensorType; - dtype: DataType; - qparam: QuantizeParam; - size: uint; - offset: ulong; -} - -table Operator { - name: string; - package_name: string; - type_name: string; - // keep only tensor indexes - inputs: [uint]; - outputs: [uint]; - params: [uint]; -} - -table Graph { - name: string; - nodes: [Operator]; - tensors: [Tensor]; -} - -table Context { - graphs: [Graph]; -} - -root_type Context; diff --git a/backends/qualcomm/aot/ir/qcir_utils.cpp b/backends/qualcomm/aot/ir/qcir_utils.cpp deleted file mode 100755 index de9e349abe7..00000000000 --- a/backends/qualcomm/aot/ir/qcir_utils.cpp +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright (c) Qualcomm Innovation Center, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include - -#include - -namespace executorch { -namespace backends { -namespace qnn { - -qcir::TensorType ToTensorType(Qnn_TensorType_t type) { - static const std::unordered_map type_map{ - {QNN_TENSOR_TYPE_APP_WRITE, qcir::TensorType::WRITE}, - {QNN_TENSOR_TYPE_APP_READ, qcir::TensorType::READ}, - {QNN_TENSOR_TYPE_APP_READWRITE, qcir::TensorType::READWRITE}, - {QNN_TENSOR_TYPE_NATIVE, qcir::TensorType::NATIVE}, - {QNN_TENSOR_TYPE_STATIC, qcir::TensorType::STATIC}, - {QNN_TENSOR_TYPE_NULL, qcir::TensorType::OPTIONAL}, - {QNN_TENSOR_TYPE_UNDEFINED, qcir::TensorType::UNDEFINED}, - }; - return type_map.at(type); -} - -Qnn_TensorType_t ToTensorType(qcir::TensorType type) { - static const std::unordered_map type_map{ - {qcir::TensorType::WRITE, QNN_TENSOR_TYPE_APP_WRITE}, - {qcir::TensorType::READ, QNN_TENSOR_TYPE_APP_READ}, - {qcir::TensorType::READWRITE, QNN_TENSOR_TYPE_APP_READWRITE}, - {qcir::TensorType::NATIVE, QNN_TENSOR_TYPE_NATIVE}, - {qcir::TensorType::STATIC, QNN_TENSOR_TYPE_STATIC}, - {qcir::TensorType::OPTIONAL, QNN_TENSOR_TYPE_NULL}, - {qcir::TensorType::UNDEFINED, QNN_TENSOR_TYPE_UNDEFINED}, - }; - return type_map.at(type); -} - -// TODO: enable commented type by QNN version control -qcir::DataType ToDataType(Qnn_DataType_t type) { - static const std::unordered_map type_map{ - {QNN_DATATYPE_INT_8, qcir::DataType::INT8}, - {QNN_DATATYPE_INT_16, qcir::DataType::INT16}, - {QNN_DATATYPE_INT_32, qcir::DataType::INT32}, - {QNN_DATATYPE_INT_64, qcir::DataType::INT64}, - {QNN_DATATYPE_UINT_8, qcir::DataType::UINT8}, - {QNN_DATATYPE_UINT_16, qcir::DataType::UINT16}, - {QNN_DATATYPE_UINT_32, qcir::DataType::UINT32}, - {QNN_DATATYPE_UINT_64, qcir::DataType::UINT64}, - {QNN_DATATYPE_FLOAT_16, qcir::DataType::FLOAT16}, - {QNN_DATATYPE_FLOAT_32, qcir::DataType::FLOAT32}, - // {QNN_DATATYPE_FLOAT_64, qcir::DataType::FLOAT64}, - {QNN_DATATYPE_SFIXED_POINT_4, qcir::DataType::SFIXED4}, - {QNN_DATATYPE_SFIXED_POINT_8, qcir::DataType::SFIXED8}, - {QNN_DATATYPE_SFIXED_POINT_16, qcir::DataType::SFIXED16}, - {QNN_DATATYPE_SFIXED_POINT_32, qcir::DataType::SFIXED32}, - {QNN_DATATYPE_UFIXED_POINT_4, qcir::DataType::UFIXED4}, - {QNN_DATATYPE_UFIXED_POINT_8, qcir::DataType::UFIXED8}, - {QNN_DATATYPE_UFIXED_POINT_16, qcir::DataType::UFIXED16}, - {QNN_DATATYPE_UFIXED_POINT_32, qcir::DataType::UFIXED32}, - {QNN_DATATYPE_BOOL_8, qcir::DataType::BOOL}, - // {QNN_DATATYPE_STRING, qcir::DataType::STRING}, - {QNN_DATATYPE_UNDEFINED, qcir::DataType::UNDEFINED}, - }; - return type_map.at(type); -} - -// TODO: enable commented type by QNN version control -Qnn_DataType_t ToDataType(qcir::DataType type) { - static const std::unordered_map type_map{ - {qcir::DataType::INT8, QNN_DATATYPE_INT_8}, - {qcir::DataType::INT16, QNN_DATATYPE_INT_16}, - {qcir::DataType::INT32, QNN_DATATYPE_INT_32}, - {qcir::DataType::INT64, QNN_DATATYPE_INT_64}, - {qcir::DataType::UINT8, QNN_DATATYPE_UINT_8}, - {qcir::DataType::UINT16, QNN_DATATYPE_UINT_16}, - {qcir::DataType::UINT32, QNN_DATATYPE_UINT_32}, - {qcir::DataType::UINT64, QNN_DATATYPE_UINT_64}, - {qcir::DataType::FLOAT16, QNN_DATATYPE_FLOAT_16}, - {qcir::DataType::FLOAT32, QNN_DATATYPE_FLOAT_32}, - // {qcir::DataType::FLOAT64, QNN_DATATYPE_FLOAT_64}, - {qcir::DataType::SFIXED4, QNN_DATATYPE_SFIXED_POINT_4}, - {qcir::DataType::SFIXED8, QNN_DATATYPE_SFIXED_POINT_8}, - {qcir::DataType::SFIXED16, QNN_DATATYPE_SFIXED_POINT_16}, - {qcir::DataType::SFIXED32, QNN_DATATYPE_SFIXED_POINT_32}, - {qcir::DataType::UFIXED4, QNN_DATATYPE_UFIXED_POINT_4}, - {qcir::DataType::UFIXED8, QNN_DATATYPE_UFIXED_POINT_8}, - {qcir::DataType::UFIXED16, QNN_DATATYPE_UFIXED_POINT_16}, - {qcir::DataType::UFIXED32, QNN_DATATYPE_UFIXED_POINT_32}, - {qcir::DataType::BOOL, QNN_DATATYPE_BOOL_8}, - // {qcir::DataType::STRING, QNN_DATATYPE_STRING}, - {qcir::DataType::UNDEFINED, QNN_DATATYPE_UNDEFINED}, - }; - return type_map.at(type); -} - -flatbuffers::Offset ToQuantizeParam( - const Qnn_Tensor_t& tensor, - flatbuffers::FlatBufferBuilder* builder) { - static const std::unordered_map def_map{ - {QNN_DEFINITION_IMPL_GENERATED, qcir::QuantizeDef::IMPL_GENERATED}, - {QNN_DEFINITION_DEFINED, qcir::QuantizeDef::DEFINED}, - {QNN_DEFINITION_UNDEFINED, qcir::QuantizeDef::UNDEFINED}, - }; - static const std:: - unordered_map - type_map{ - {QNN_QUANTIZATION_ENCODING_SCALE_OFFSET, - qcir::QuantizeType::SCALE_OFFSET}, - {QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET, - qcir::QuantizeType::AXIS_SCALE_OFFSET}, - {QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET, - qcir::QuantizeType::BW_SCALE_OFFSET}, - {QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET, - qcir::QuantizeType::BW_AXIS_SCALE_OFFSET}, - {QNN_QUANTIZATION_ENCODING_BLOCKWISE_EXPANSION, - qcir::QuantizeType::BLOCKWISE_EXPANSION}, - {QNN_QUANTIZATION_ENCODING_UNDEFINED, - qcir::QuantizeType::UNDEFINED}, - }; - - int32_t axis = 0; - uint32_t bitwidth = 0, num_blocks_per_axis = 0; - auto param = QNN_TENSOR_VER_PTR(tensor)->quantizeParams; - auto quant_type = type_map.at(param.quantizationEncoding); - std::vector data; - std::vector block_scale; - std::vector scales; - std::vector offsets; - qcir::BlockScaleStorageType block_scale_storage_type = - qcir::BlockScaleStorageType::BITWIDTH_SCALE_STORAGE_8; - switch (quant_type) { - case qcir::QuantizeType::SCALE_OFFSET: { - data.emplace_back(qcir::ScaleOffset( - param.scaleOffsetEncoding.scale, param.scaleOffsetEncoding.offset)); - } break; - case qcir::QuantizeType::AXIS_SCALE_OFFSET: { - size_t len = param.axisScaleOffsetEncoding.numScaleOffsets; - axis = param.axisScaleOffsetEncoding.axis; - data.reserve(len); - for (uint i = 0; i < len; ++i) { - data.emplace_back(qcir::ScaleOffset( - param.axisScaleOffsetEncoding.scaleOffset[i].scale, - param.axisScaleOffsetEncoding.scaleOffset[i].offset)); - } - } break; - case qcir::QuantizeType::BW_SCALE_OFFSET: { - bitwidth = param.bwScaleOffsetEncoding.bitwidth; - scales.push_back(param.bwScaleOffsetEncoding.scale); - offsets.push_back(param.bwScaleOffsetEncoding.offset); - } break; - case qcir::QuantizeType::BW_AXIS_SCALE_OFFSET: { - bitwidth = param.bwAxisScaleOffsetEncoding.bitwidth; - axis = param.bwAxisScaleOffsetEncoding.axis; - size_t len = param.bwAxisScaleOffsetEncoding.numElements; - scales.reserve(len); - offsets.reserve(len); - for (size_t i = 0; i < len; ++i) { - scales.push_back(param.bwAxisScaleOffsetEncoding.scales[i]); - offsets.push_back(param.bwAxisScaleOffsetEncoding.offsets[i]); - } - } break; - case qcir::QuantizeType::BLOCKWISE_EXPANSION: { - bitwidth = param.blockwiseExpansion->blockScaleBitwidth; - axis = param.blockwiseExpansion->axis; - uint num_channels = QNN_TENSOR_VER_PTR(tensor)->dimensions[axis]; - for (uint i = 0; i < num_channels; ++i) { - data.emplace_back(qcir::ScaleOffset( - param.blockwiseExpansion->scaleOffsets[i].scale, - param.blockwiseExpansion->scaleOffsets[i].offset)); - } - num_blocks_per_axis = param.blockwiseExpansion->numBlocksPerAxis; - uint multiplier = 1; - if (param.blockwiseExpansion->blockScaleStorageType == - QNN_BLOCKWISE_EXPANSION_BITWIDTH_SCALE_STORAGE_16) { - multiplier = 2; - block_scale_storage_type = - qcir::BlockScaleStorageType::BITWIDTH_SCALE_STORAGE_16; - } - uint total_bytes = num_channels * num_blocks_per_axis * multiplier; - block_scale = std::vector( - param.blockwiseExpansion->blocksScale8, - param.blockwiseExpansion->blocksScale8 + total_bytes); - } break; - default: - // encodings are not required if lowering with floating point precision - break; - } - return CreateQuantizeParamDirect( - *builder, - def_map.at(param.encodingDefinition), - quant_type, - bitwidth, - axis, - &scales, - &offsets, - &data, - num_blocks_per_axis, - block_scale_storage_type, - &block_scale); -} - -Qnn_QuantizeParams_t ToQuantizeParam(const tensor_type& tensor) { - static const std::unordered_map def_map{ - {qcir::QuantizeDef::IMPL_GENERATED, QNN_DEFINITION_IMPL_GENERATED}, - {qcir::QuantizeDef::DEFINED, QNN_DEFINITION_DEFINED}, - {qcir::QuantizeDef::UNDEFINED, QNN_DEFINITION_UNDEFINED}, - }; - static const std:: - unordered_map - type_map{ - {qcir::QuantizeType::SCALE_OFFSET, - QNN_QUANTIZATION_ENCODING_SCALE_OFFSET}, - {qcir::QuantizeType::AXIS_SCALE_OFFSET, - QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET}, - {qcir::QuantizeType::BW_SCALE_OFFSET, - QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET}, - {qcir::QuantizeType::BW_AXIS_SCALE_OFFSET, - QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET}, - {qcir::QuantizeType::BLOCKWISE_EXPANSION, - QNN_QUANTIZATION_ENCODING_BLOCKWISE_EXPANSION}, - {qcir::QuantizeType::UNDEFINED, - QNN_QUANTIZATION_ENCODING_UNDEFINED}, - }; - // Qnn_BlockwiseExpansion_t is a pointer type in Qnn_QuantizeParams_t - // need a bookkeeper for guarding life cycle - static std::vector> block_param; - - Qnn_QuantizeParams_t p = QNN_QUANTIZE_PARAMS_INIT; - auto param = tensor->qparam(); - p.encodingDefinition = def_map.at(param->def()); - p.quantizationEncoding = type_map.at(param->type()); - switch (p.quantizationEncoding) { - case QNN_QUANTIZATION_ENCODING_SCALE_OFFSET: { - p.scaleOffsetEncoding.scale = param->data()->Get(0)->scale(); - p.scaleOffsetEncoding.offset = param->data()->Get(0)->offset(); - } break; - case QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET: { - p.axisScaleOffsetEncoding.axis = param->axis(); - p.axisScaleOffsetEncoding.numScaleOffsets = param->data()->size(); - p.axisScaleOffsetEncoding.scaleOffset = - reinterpret_cast( - const_cast(param->data()->Data())); - } break; - case QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET: { - p.bwAxisScaleOffsetEncoding.bitwidth = param->bitwidth(); - p.bwScaleOffsetEncoding.scale = param->scales()->Get(0); - p.bwScaleOffsetEncoding.offset = param->offsets()->Get(0); - } break; - case QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET: { - p.bwAxisScaleOffsetEncoding.bitwidth = param->bitwidth(); - p.bwAxisScaleOffsetEncoding.axis = param->axis(); - p.bwAxisScaleOffsetEncoding.numElements = param->scales()->size(); - p.bwAxisScaleOffsetEncoding.scales = - const_cast(param->scales()->data()); - p.bwAxisScaleOffsetEncoding.offsets = - const_cast(param->offsets()->data()); - } break; - case QNN_QUANTIZATION_ENCODING_BLOCKWISE_EXPANSION: { - block_param.emplace_back(std::make_unique()); - p.blockwiseExpansion = block_param.back().get(); - p.blockwiseExpansion->axis = param->axis(); - p.blockwiseExpansion->scaleOffsets = reinterpret_cast( - const_cast(param->data()->Data())); - p.blockwiseExpansion->numBlocksPerAxis = param->num_blocks_per_axis(); - switch (param->block_scale_storage_type()) { - case qcir::BlockScaleStorageType::BITWIDTH_SCALE_STORAGE_8: - p.blockwiseExpansion->blockScaleStorageType = - QNN_BLOCKWISE_EXPANSION_BITWIDTH_SCALE_STORAGE_8; - break; - case qcir::BlockScaleStorageType::BITWIDTH_SCALE_STORAGE_16: - p.blockwiseExpansion->blockScaleStorageType = - QNN_BLOCKWISE_EXPANSION_BITWIDTH_SCALE_STORAGE_16; - break; - default: - p.blockwiseExpansion->blockScaleStorageType = - QNN_BLOCKWISE_EXPANSION_BITWIDTH_SCALE_STORAGE_UNDEFINED; - break; - } - p.blockwiseExpansion->blocksScale8 = - const_cast(param->block_scale()->Data()); - } break; - default: - // encodings are not required if lowering with floating point precision - break; - } - return p; -} - -flatbuffers::Offset ToTensor( - const Qnn_Tensor_t& tensor, - const uint64_t data_offset, - flatbuffers::FlatBufferBuilder* builder) { - std::vector shape( - QNN_TENSOR_VER_PTR(tensor)->dimensions, - QNN_TENSOR_VER_PTR(tensor)->dimensions + - QNN_TENSOR_VER_PTR(tensor)->rank); - std::vector dynamic_dims( - QNN_TENSOR_VER_PTR(tensor)->isDynamicDimensions, - QNN_TENSOR_VER_PTR(tensor)->isDynamicDimensions + - QNN_TENSOR_VER_PTR(tensor)->rank); - - return qcir::CreateTensorDirect( - *builder, - QNN_TENSOR_VER_PTR(tensor)->name, - &shape, - &dynamic_dims, - ToTensorType(QNN_TENSOR_VER_PTR(tensor)->type), - ToDataType(QNN_TENSOR_VER_PTR(tensor)->dataType), - ToQuantizeParam(tensor, builder), - QNN_TENSOR_VER_PTR(tensor)->clientBuf.dataSize, - data_offset); -} - -Qnn_Tensor_t ToTensor(const tensor_type& tensor, const uint8_t* data_ptr) { - auto is_io_tensor = [](Qnn_TensorType_t type) { - return type < QNN_TENSOR_TYPE_STATIC; - }; - - Qnn_Tensor_t t({.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT}); - QNN_TENSOR_VER_PTR(t)->name = tensor->name()->c_str(); - QNN_TENSOR_VER_PTR(t)->type = ToTensorType(tensor->type()); - QNN_TENSOR_VER_PTR(t)->dataType = ToDataType(tensor->dtype()); - QNN_TENSOR_VER_PTR(t)->quantizeParams = ToQuantizeParam(tensor); - QNN_TENSOR_VER_PTR(t)->rank = tensor->shape()->size(); - QNN_TENSOR_VER_PTR(t)->dimensions = - const_cast(tensor->shape()->data()); - QNN_TENSOR_VER_PTR(t)->isDynamicDimensions = - const_cast(tensor->dynamic_dims()->data()); - QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize = tensor->size(); - QNN_TENSOR_VER_PTR(t)->clientBuf.data = - is_io_tensor(QNN_TENSOR_VER_PTR(t)->type) - ? nullptr - : static_cast(const_cast(data_ptr)); - return t; -} - -} // namespace qnn -} // namespace backends -} // namespace executorch diff --git a/backends/qualcomm/aot/ir/qcir_utils.h b/backends/qualcomm/aot/ir/qcir_utils.h deleted file mode 100755 index 085f09bf145..00000000000 --- a/backends/qualcomm/aot/ir/qcir_utils.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) Qualcomm Innovation Center, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include -#include "QnnTypes.h" - -namespace executorch { -namespace backends { -namespace qnn { - -typedef flatbuffers::Vector<::flatbuffers::Offset>::return_type - tensor_type; -typedef flatbuffers::Vector< - ::flatbuffers::Offset>::return_type qparam_type; - -qcir::TensorType ToTensorType(Qnn_TensorType_t type); -Qnn_TensorType_t ToTensorType(qcir::TensorType type); -qcir::DataType ToDataType(Qnn_DataType_t type); -Qnn_DataType_t ToDataType(qcir::DataType type); - -flatbuffers::Offset ToQuantizeParam( - const Qnn_Tensor_t& tensor, - flatbuffers::FlatBufferBuilder* builder); -Qnn_QuantizeParams_t ToQuantizeParam(const tensor_type& tensor); - -flatbuffers::Offset ToTensor( - const Qnn_Tensor_t& tensor, - const uint64_t data_offset, - flatbuffers::FlatBufferBuilder* builder); -Qnn_Tensor_t ToTensor(const tensor_type& tensor, const uint8_t* data_ptr); - -} // namespace qnn -} // namespace backends -} // namespace executorch diff --git a/backends/qualcomm/aot/ir/targets.bzl b/backends/qualcomm/aot/ir/targets.bzl deleted file mode 100644 index 2405af35d6c..00000000000 --- a/backends/qualcomm/aot/ir/targets.bzl +++ /dev/null @@ -1,68 +0,0 @@ -load( - "@fbsource//tools/build_defs:default_platform_defs.bzl", - "ANDROID", -) -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") -load("@fbsource//xplat/executorch/backends/qualcomm:targets.bzl", "generate_schema_header") -load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_version") - -QCIR_NAME = "qcir" -INPUT_QCIR = QCIR_NAME + ".fbs" -OUTPUT_QCIR_HEADER = QCIR_NAME + "_generated.h" -QCIR_GEN_RULE_NAME = "qcir_generated" - -def define_common_targets(): - """Defines targets that should be shared between fbcode and xplat. - The directory containing this targets.bzl file should also contain both - TARGETS and BUCK files that call this function. - """ - - generate_schema_header( - QCIR_GEN_RULE_NAME, - [INPUT_QCIR], - [OUTPUT_QCIR_HEADER], - OUTPUT_QCIR_HEADER, - ) - - # Header-only library target with the generate executorch program schema header. - runtime.cxx_library( - name = "qcir_schema", - srcs = [], - exported_headers = { - OUTPUT_QCIR_HEADER: ":{}[{}]".format(QCIR_GEN_RULE_NAME, OUTPUT_QCIR_HEADER), - }, - visibility = [ - # Lock this down as tightly as possible to ensure that flatbuffers - # are an implementation detail. Ideally this list would only include - # //executorch/runtime/executor/... - "//executorch/backends/qualcomm/...", - "//executorch/backends/qualcomm/aot/ir/...", - ], - exported_external_deps = ["flatbuffers-api"], - define_static_target = True, - platforms = [ANDROID], - ) - - - runtime.cxx_library( - name = "qcir_utils", - srcs = [ - "qcir_utils.cpp", - ], - exported_headers = [ - "qcir_utils.h", - ], - define_static_target = True, - platforms = [ANDROID], - visibility = ["@EXECUTORCH_CLIENTS"], - deps = [ - "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()), - "fbsource//third-party/qualcomm/qnn/qnn-{0}:app_sources".format(get_qnn_library_version()), - "//executorch/runtime/backend:interface", - "//executorch/runtime/core:core", - "//executorch/backends/qualcomm/aot/wrappers:wrappers", - ], - exported_deps = [ - ":qcir_schema", - ], - ) diff --git a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h index 409ec1a4294..c8044e5db0e 100644 --- a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h +++ b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h @@ -132,16 +132,6 @@ class PyQnnManager { return qnn_manager_->GetSpillFillBufferSize(); } - QnnExecuTorchContextBinary MakeQcirCustomBinaryInfo( - const QnnExecuTorchContextBinary& ctx_bin, - const std::vector& tensor_data) { - custom_qcir_protocol_buffer_ = - QnnQcirCustomProtocol(ctx_bin.nbytes, tensor_data.size()); - custom_qcir_protocol_buffer_.BuildQcirCustomBuffer(ctx_bin, tensor_data); - auto [ptr, size] = custom_qcir_protocol_buffer_.GetCustomProtocolBuffer(); - return {ptr, size}; - } - py::array_t MakeBinaryInfo(const py::bytes& ctx_bin) { py::buffer_info info(py::buffer(ctx_bin).request()); QnnExecuTorchContextBinary binary( @@ -171,22 +161,10 @@ class PyQnnManager { buf_size = ctx_size; buf_ptr = ctx_bin; } else { - // check if it's a qcir flatbuffers, return fbs if matched - auto - [status, - qcir_fbs_size, - qcir_tensor_size, - qcir_fbs_ptr, - qcir_tensor_ptr] = - QnnQcirCustomProtocol().DeserializeQcirCustomBuffer(info.ptr); - if (status == Error::Ok) { - buf_size = qcir_fbs_size; - buf_ptr = qcir_fbs_ptr; - } else { - // the format should be DLC, return nothing here - return py::array_t(0); - } + // the format should be DLC, return nothing here + return py::array_t(0); } + auto result = py::array_t(buf_size); auto result_buffer = result.request(); std::memcpy(result_buffer.ptr, buf_ptr, buf_size); @@ -199,7 +177,6 @@ class PyQnnManager { const py::bytes qnn_executorch_option_ptr_; QnnExecuTorchContextBinary qnn_executorch_context_binary_; std::shared_ptr qnn_manager_; - QnnQcirCustomProtocol custom_qcir_protocol_buffer_; QnnContextCustomProtocol custom_context_custom_buffer_; flatbuffers::FlatBufferBuilder builder_; }; diff --git a/backends/qualcomm/aot/python/targets.bzl b/backends/qualcomm/aot/python/targets.bzl index da27997808b..74fbd1da511 100644 --- a/backends/qualcomm/aot/python/targets.bzl +++ b/backends/qualcomm/aot/python/targets.bzl @@ -31,7 +31,6 @@ def define_common_targets(): "//executorch/backends/qualcomm/aot/wrappers:wrappers", "//executorch/backends/qualcomm/runtime:logging", "//executorch/backends/qualcomm:schema", - "//executorch/backends/qualcomm/aot/ir:qcir_utils", "//executorch/backends/qualcomm/runtime:runtime", "fbsource//third-party/pybind11:pybind11", "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()), @@ -65,7 +64,6 @@ def define_common_targets(): "//executorch/backends/qualcomm/aot/wrappers:wrappers", "//executorch/backends/qualcomm/runtime:logging", "//executorch/backends/qualcomm:schema", - "//executorch/backends/qualcomm/aot/ir:qcir_utils", "//executorch/backends/qualcomm/runtime:runtime", "fbsource//third-party/pybind11:pybind11", "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()), @@ -94,7 +92,6 @@ def define_common_targets(): "//executorch/backends/qualcomm/aot/wrappers:wrappers", "//executorch/backends/qualcomm/runtime:logging", "//executorch/backends/qualcomm:schema", - "//executorch/backends/qualcomm/aot/ir:qcir_utils", "//executorch/backends/qualcomm/runtime:runtime", "fbsource//third-party/pybind11:pybind11", "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()), diff --git a/backends/qualcomm/runtime/QnnExecuTorch.h b/backends/qualcomm/runtime/QnnExecuTorch.h index 2ca0cd61cd5..83776937411 100644 --- a/backends/qualcomm/runtime/QnnExecuTorch.h +++ b/backends/qualcomm/runtime/QnnExecuTorch.h @@ -22,8 +22,7 @@ extern "C" { // This could be: // 1. qnn_context_binary -// 2. QnnQcirCustomProtocol -// 3. QnnContextCustomProtocol +// 2. QnnContextCustomProtocol // To check if it is custom protocol, users can deserialize the binary using // QnnCustomProtocol and check the status typedef struct { diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp index 01bf13603d6..751e5d18119 100644 --- a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp +++ b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp @@ -48,8 +48,7 @@ Result QnnExecuTorchBackend::init( qnn_context_blob.buffer = ctx_bin; } else { // This buffer will be verified again in QnnBackendCache. - QNN_EXECUTORCH_LOG_INFO( - "Deserializing processed data using QnnQcirCustomProtocol"); + QNN_EXECUTORCH_LOG_INFO("Deserializing processed data using Dlc"); qnn_context_blob.buffer = const_cast(processed->data()); qnn_context_blob.nbytes = processed->size(); } diff --git a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp index 4387d61ab7c..3dd1738d33b 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp @@ -113,7 +113,6 @@ Error QnnBackendCache::Configure(const std::vector& graph_names) { // DO DESERIALIZE state_ = DESERIALIZE; QNN_EXECUTORCH_LOG_INFO("Caching: Caching is in RESTORE MODE."); - auto [status, _, context_size, context_ptr] = QnnContextCustomProtocol().DeserializeContextCustomBuffer( qnn_context_blob_.buffer); diff --git a/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp b/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp index 12de1b3e705..b01d7ab6d80 100644 --- a/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp +++ b/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp @@ -12,87 +12,6 @@ namespace executorch { namespace backends { namespace qnn { -// we still need this for on-device op validation of other backends -void QnnQcirCustomProtocol::BuildQcirCustomBuffer( - const QnnExecuTorchContextBinary& qcir_binary, - const std::vector& tensor_data) { - if (qnn_custom_buffer_.size() == 0) { - uint8_t magic_number_proto_size = sizeof(magic_number_); - uint8_t qcir_fbs_proto_size = sizeof(qcir_fbs_size_); - uint8_t tensor_proto_size = sizeof(tensor_size_); - - uint64_t buffer_size = magic_number_proto_size + qcir_fbs_proto_size + - tensor_proto_size + qcir_fbs_size_ + tensor_size_; - qnn_custom_buffer_.resize(buffer_size, 0); - - size_t pos = 0; - // magic number itself - std::memcpy( - qnn_custom_buffer_.data(), &magic_number_, magic_number_proto_size); - pos += magic_number_proto_size; - - // size of qcir_fbs, should be 4 bytes - std::memcpy( - qnn_custom_buffer_.data() + pos, &qcir_fbs_size_, qcir_fbs_proto_size); - pos += qcir_fbs_proto_size; - - // size of tensor, should be 8 bytes - std::memcpy( - qnn_custom_buffer_.data() + pos, &tensor_size_, tensor_proto_size); - pos += tensor_proto_size; - - // qcir.fbs buffer - uint8_t* qcir_ptr = static_cast(qcir_binary.buffer); - - std::memcpy(qnn_custom_buffer_.data() + pos, qcir_ptr, qcir_fbs_size_); - pos += qcir_fbs_size_; - - // tensor data - std::memcpy( - qnn_custom_buffer_.data() + pos, tensor_data.data(), tensor_size_); - } -} - -std::tuple -QnnQcirCustomProtocol::DeserializeQcirCustomBuffer(void* processed_data) { - Error status = Error::Ok; - uint8_t* ptr = static_cast(processed_data); - size_t magic_number_proto_size = sizeof(magic_number_); - uint8_t qcir_fbs_proto_size = sizeof(qcir_fbs_size_); - uint8_t tensor_proto_size = sizeof(tensor_size_); - - uint32_t magic_number; - std::memcpy(&magic_number, ptr, magic_number_proto_size); - ptr += magic_number_proto_size; - - if (magic_number != magic_number_) { - QNN_EXECUTORCH_LOG_INFO( - "QnnQcirCustomProtocol expected magic number: 0x%x but get: 0x%x", - magic_number_, - magic_number); - status = Error::Internal; - } - - // Retrieve size of qcir.fbs - uint32_t qcir_fbs_size; - std::memcpy(&qcir_fbs_size, ptr, qcir_fbs_proto_size); - ptr += qcir_fbs_proto_size; - - // Retrieve size of tensor - uint64_t tensor_size; - std::memcpy(&tensor_size, ptr, tensor_proto_size); - ptr += tensor_proto_size; - - // Retrieve qcir.fbs pointer - void* qcir_fbs_ptr = static_cast(ptr); - ptr += qcir_fbs_size; - - // Retrieve tensor - void* tensor_ptr = static_cast(ptr); - - return {status, qcir_fbs_size, tensor_size, qcir_fbs_ptr, tensor_ptr}; -} - void QnnContextCustomProtocol::BuildContextCustomBuffer() { if (qnn_custom_buffer_.size() == 0) { signature_ = diff --git a/backends/qualcomm/runtime/backends/QnnCustomProtocol.h b/backends/qualcomm/runtime/backends/QnnCustomProtocol.h index 6ea556899f5..3cc6a6e25dc 100644 --- a/backends/qualcomm/runtime/backends/QnnCustomProtocol.h +++ b/backends/qualcomm/runtime/backends/QnnCustomProtocol.h @@ -24,13 +24,8 @@ namespace qnn { using executorch::runtime::Error; -// We have 2 kinds of protocol here: custom_qcir_protocol, -// custom_context_protocol. We need this class due to limitation of 32bits -// flatbuffer. Since larger models can exceed the maximum size for 32bits -// flatbuffer, we need to define our own protocol and store some information -// outside of the flatbuffer. The magic number helps determine if we are getting -// the correct custom protocol buffer and differentiate custom_qcir_protocol -// from custom_context_protocol. +// Required for multi-graph support to retrieve qnn manager handle via unique +// signature. class QnnCustomProtocol { public: QnnCustomProtocol() {} @@ -47,48 +42,6 @@ class QnnCustomProtocol { std::vector qnn_custom_buffer_; }; -// For custom_qcir_protocol, we expect the following format: -// -// ------------------------------ -// | qcir magic number (4 bytes)| -// ------------------------------ -// | qcir.fbs size (4 bytes) | -// ------------------------------ -// | tensor size (8 bytes) | -// ------------------------------ -// | qcir.fbs (flatbuffer) | -// ------------------------------ -// | tensor.data | -// ------------------------------ -class QnnQcirCustomProtocol : public QnnCustomProtocol { - public: - // Constructor for Serialize - QnnQcirCustomProtocol(uint32_t qcir_fbs_size, uint64_t tensor_size) - : QnnCustomProtocol(), - qcir_fbs_size_(qcir_fbs_size), - tensor_size_(tensor_size) {} - - // Constructor for Deserialize - QnnQcirCustomProtocol() : QnnCustomProtocol() {} - - void BuildQcirCustomBuffer( - const QnnExecuTorchContextBinary& qcir_binary, - const std::vector& tensor_data); - // Return a tuple with 5 elements: - // 1) Error: Status of whether deserializing is successful. - // 2) uint32_t: Size of qcir fbs - // 3) uint64_t: Size of tensor - // 4) void*: Pointer pointing to the start of qcir fbs - // 5) void*: Pointer pointing to the start of tensor - std::tuple - DeserializeQcirCustomBuffer(void* processed_data); - - private: - static constexpr uint32_t magic_number_ = 0x1234ABCD; - uint32_t qcir_fbs_size_{0}; - uint64_t tensor_size_{0}; -}; - // For custom context binary protocol, we expect the following format: // // --------------------------------- diff --git a/backends/qualcomm/runtime/targets.bzl b/backends/qualcomm/runtime/targets.bzl index 1bd82f8f913..a74b16ad4a2 100644 --- a/backends/qualcomm/runtime/targets.bzl +++ b/backends/qualcomm/runtime/targets.bzl @@ -73,7 +73,6 @@ def define_common_targets(): "fbsource//third-party/qualcomm/qnn/qnn-{0}:app_sources".format(get_qnn_library_version()), ":logging", "//executorch/backends/qualcomm:schema", - "//executorch/backends/qualcomm/aot/ir:qcir_utils", "//executorch/backends/qualcomm/aot/wrappers:wrappers", "//executorch/runtime/backend:interface", "//executorch/runtime/core:core", diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index fd2d10e2b93..051e2c78184 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -144,30 +144,6 @@ def validate_context_binary(ctx_bin: bytes): assert os.path.isfile(f"{tmp_dir}/ctx.json"), print(result.stderr) -def validate_qcir(qcir: bytes): - with tempfile.TemporaryDirectory() as tmp_dir: - with open(f"{tmp_dir}/qcir.bin", "wb") as binary_file: - binary_file.write(qcir) - - cmds = [ - "flatc", - "-o", - tmp_dir, - "--raw-binary", - "-t", - f"{os.path.dirname(__file__)}/../aot/ir/qcir.fbs", - "--", - f"{tmp_dir}/qcir.bin", - ] - result = subprocess.run( - " ".join(cmds), - shell=True, - executable="/bin/bash", - capture_output=True, - ) - assert os.path.isfile(f"{tmp_dir}/qcir.json"), print(result.stderr) - - class TestQNN(unittest.TestCase): rtol: float = 0 atol: float = 0