From c18625a820b98275f673db6e57a96f7952529d7d Mon Sep 17 00:00:00 2001 From: minfhong Date: Fri, 27 Jun 2025 13:59:10 +0800 Subject: [PATCH] [QNN EP] Support NonZero. - Implement NonZero op builder and regsiter QDQ selector. - Implement ShapeNonZero QNN preprocess to fix shape. Test: UTs. --- .../selectors_actions/shared/utils.cc | 3 +- .../qnn/builder/op_builder_factory.cc | 4 + .../qnn/builder/op_builder_factory.h | 2 + .../qnn/builder/opbuilder/base_op_builder.h | 1 + .../builder/opbuilder/nonzero_op_builder.cc | 117 +++++++++++++ .../execution_providers/qnn/preprocess.py | 6 +- .../execution_providers/qnn/shape_nonzero.py | 85 +++++++++ .../test/providers/qnn/nonzero_op_test.cc | 161 ++++++++++++++++++ .../quantization/test_qnn_preprocess_model.py | 51 ++++++ 9 files changed, 428 insertions(+), 2 deletions(-) create mode 100644 onnxruntime/core/providers/qnn/builder/opbuilder/nonzero_op_builder.cc create mode 100644 onnxruntime/python/tools/quantization/execution_providers/qnn/shape_nonzero.py create mode 100644 onnxruntime/test/providers/qnn/nonzero_op_test.cc diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc index 0116dec5170f0..fd3d43f21c8f2 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc @@ -48,7 +48,8 @@ static const OpVersionsAndSelector::OpVersionsMap GetMiscOpVersionsMap() { // These produce int64 indices output, which can't be quantized, so there's no downstream Q node. static const OpVersionsAndSelector::OpVersionsMap GetDropDQOpVersionsMap() { return {{"ArgMax", {}}, - {"ArgMin", {}}}; + {"ArgMin", {}}, + {"NonZero", {}}}; } static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() { diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc index cdc7c401ba25e..bc701050c5526 100644 --- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc +++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc @@ -223,6 +223,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() { { CreateInverseOpBuilder("Inverse", *this); } + + { + CreateNonZeroOpBuilder("NonZero", *this); + } } const IOpBuilder* GetOpBuilder(const std::string& onnx_op_type) { diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.h b/onnxruntime/core/providers/qnn/builder/op_builder_factory.h index 0c12474c784eb..4fc2d09332530 100644 --- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.h +++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.h @@ -125,5 +125,7 @@ void CreateSTFTOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_ void CreateInverseOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); +void CreateNonZeroOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); + } // namespace qnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h index 83c226115aa84..de6f44f55917a 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h @@ -156,6 +156,7 @@ class BaseOpBuilder : public IOpBuilder { {"Max", QNN_OP_ELEMENT_WISE_MAXIMUM}, {"Min", QNN_OP_ELEMENT_WISE_MINIMUM}, {"Neg", QNN_OP_ELEMENT_WISE_NEG}, + {"NonZero", QNN_OP_NON_ZERO}, {"Not", QNN_OP_ELEMENT_WISE_NOT}, {"Or", QNN_OP_ELEMENT_WISE_OR}, {"Pow", QNN_OP_ELEMENT_WISE_POWER}, diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/nonzero_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/nonzero_op_builder.cc new file mode 100644 index 0000000000000..b18de47f79b61 --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/nonzero_op_builder.cc @@ -0,0 +1,117 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include + +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" +#include "core/providers/qnn/builder/op_builder_factory.h" +#include "core/providers/qnn/builder/qnn_utils.h" + +namespace onnxruntime { +namespace qnn { + +class NonZeroOpBuilder : public BaseOpBuilder { + public: + NonZeroOpBuilder() : BaseOpBuilder("NonZeroOpBuilder") {} + + protected: + Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + std::vector&& input_names, + const logging::Logger& logger, + bool do_op_validation) const override ORT_MUST_USE_RESULT; +}; + +Status NonZeroOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + std::vector&& input_names, + const logging::Logger& logger, + bool do_op_validation) const { + // Handle a corner case explicitly, which can pass backend validation but in fact not executable. + const std::vector& input_shape = qnn_model_wrapper.GetQnnTensorWrapper(input_names[0]).GetTensorDims(); + for (const uint32_t& dim : input_shape) { + ORT_RETURN_IF(dim == 0, "QNN does not support NonZero with empty input."); + } + + const auto& output = node_unit.Outputs()[0]; + const std::string& output_name = output.node_arg.Name(); + + TensorInfo output_info = {}; + Status status = qnn_model_wrapper.GetTensorInfo(output, output_info); + if (!status.IsOK()) { + LOGS(logger, ERROR) << "Encountering NonZero " << node_unit.Name() << " which has dynamically shaped output tensor." + << "QNN supports NonZero by allocating maximum possible size (i.e., all elements != 0), " + << "and fills only the detected nonzero elements in the output tensor." + << "The model must be preproceesed to eliminate the dynamic shapes first for QNN to support."; + return status; + } + + // ONNX NonZero has shape [input_rank, #input_elements]. + uint32_t rank = output_info.shape[0]; + uint32_t num_elements = output_info.shape[1]; + + // QNN NonZero has shape [#input elements, input_rank], and thus an extra Transpose must be inserted afterwards. + const std::string transpose_input_name = utils::GetUniqueName(output_name, +"_transpose"); + const std::vector transpose_input_shape{num_elements, rank}; + QnnTensorWrapper output_tensorwrapper(transpose_input_name, + QNN_TENSOR_TYPE_NATIVE, + output_info.qnn_data_type, + output_info.quant_param.Copy(), + std::vector(transpose_input_shape)); + ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor."); + ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(utils::GetUniqueName(node_unit), + QNN_OP_PACKAGE_NAME_QTI_AISW, + GetQnnOpType(node_unit.OpType()), + std::move(input_names), + {transpose_input_name}, + {}, + do_op_validation), + "Failed to add NonZero node."); + + // NonZero's output is indices which is INT64 dtype. If it happens to be graph output as well, add a Cast node to + // cast the dtype back to INT64 since wrapper construction implicitly changes the dtype to INT32. + const bool is_cast_required = output_info.qnn_data_type == QNN_DATATYPE_INT_64 && + qnn_model_wrapper.IsGraphOutput(output_name); + const std::string transpose_output_name = is_cast_required ? utils::GetUniqueName(output_name, "_cast") : output_name; + + std::vector transpose_perm{1, 0}; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddTransposeNode(node_unit.Index(), + transpose_input_name, + transpose_output_name, + transpose_input_shape, + transpose_perm, + output_info.shape, + output_info.qnn_data_type, + output_info.quant_param, + do_op_validation, + false, + false)); + + if (is_cast_required) { + QnnTensorWrapper cast_output_tensorwrapper(output_name, + QNN_TENSOR_TYPE_APP_READ, + output_info.qnn_data_type, + output_info.quant_param.Copy(), + std::vector(output_info.shape)); + ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(cast_output_tensorwrapper)), + "Failed to add tensor."); + ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(utils::GetUniqueName(node_unit, QNN_OP_CAST), + QNN_OP_PACKAGE_NAME_QTI_AISW, + QNN_OP_CAST, + {transpose_output_name}, + {output_name}, + {}, + do_op_validation), + "Failed to add node"); + } + + return Status::OK(); +} + +void CreateNonZeroOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { + op_registrations.AddOpBuilder(op_type, std::make_unique()); +} + +} // namespace qnn +} // namespace onnxruntime diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/preprocess.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/preprocess.py index a12aca47f5b65..584463b3bc44c 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/preprocess.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/preprocess.py @@ -17,6 +17,7 @@ from ...onnx_model import ONNXModel from .fusion_lpnorm import FusionLpNormalization from .fusion_spacetodepth import FusionSpaceToDepth +from .shape_nonzero import ShapeNonZero def qnn_preprocess_model( @@ -108,6 +109,9 @@ def qnn_preprocess_model( if exclude_initializer_from_input: modified |= remove_initializer_from_input(onnx_model.model) + # Shape dynamic-shaped NonZero. + modified |= ShapeNonZero(onnx_model).apply() + # Fuse Erf sequence into a single Gelu fusion_gelu = FusionGelu(onnx_model) if fusion_gelu.apply(): @@ -166,7 +170,7 @@ def qnn_preprocess_model( if modified: onnx_model.topological_sort() onnx.save_model( - model, + onnx_model.model, model_output, save_as_external_data=save_as_external_data, all_tensors_to_one_file=all_tensors_to_one_file, diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/shape_nonzero.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/shape_nonzero.py new file mode 100644 index 0000000000000..8733da618f361 --- /dev/null +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/shape_nonzero.py @@ -0,0 +1,85 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +"""Define NonZero shape inference.""" + +import logging + +import numpy as np +import onnx + +from ... import fusions, onnx_model + + +class ShapeNonZero(fusions.Fusion): + """Shape inference for NonZero. + + NonZero node produces dynamically shaped output tensor, causing the tensor shapes of following nodes undetermined + as well. QNN expects NonZero having its shape set to maximum size (i.e., number of total input elements) and let + runtime handle the dynamic shape later. + """ + + def __init__(self, model: onnx_model.ONNXModel): + """Initialize. + Args: + model: An onnx_model.ONNXModel instance. + """ + super().__init__(model, "", "NonZero") + + def fuse( + self, + node: onnx.NodeProto, + input_name_to_nodes: dict[str, list[onnx.NodeProto]], + output_name_to_node: dict[str, onnx.NodeProto], + ) -> bool: + """Infer shape for NonZero. + + Args: + node: An onnx.NodeProto matching the specified search type (i.e., NonZero). + input_name_to_nodes: A dict mapping tensor name to consumed nodes. + output_name_to_node: A dict mapping tensor name to produced node. + + Returns: + A bool indicating whether the node is updated. + """ + logging.warning( + "The model contains a NonZero node which produces a dynamically shaped output tensor." + "Following QNN requirements, its output shape will be deliberately set to the maximum size." + ) + + if (input_tensor_type := self.model.get_tensor_type(node.input[0])) is None or ( + output_tensor_type := self.model.get_tensor_type(node.output[0]) + ) is None: + return False + + if not (input_tensor_shape := self.tensor_shape_to_list(input_tensor_type)): + return False + + if not all(isinstance(dim, int) for dim in input_tensor_shape): + return False + + output_tensor_type.shape.dim[1].dim_value = np.prod(input_tensor_shape) + return True + + def apply(self) -> bool: + """Apply fusion. + + This method is overridden to execute shape inference again since NonZero will have fixed shape. + + Returns: + A bool indicating whether the model is updated. + """ + input_name_to_nodes = self.model.input_name_to_nodes() + output_name_to_node = self.model.output_name_to_node() + + updated = False + for node in self.model.nodes(): + if node.op_type == self.search_op_type: + updated |= self.fuse(node, input_name_to_nodes, output_name_to_node) + + if updated: + self.model.model = onnx.shape_inference.infer_shapes(self.model.model) + + return updated diff --git a/onnxruntime/test/providers/qnn/nonzero_op_test.cc b/onnxruntime/test/providers/qnn/nonzero_op_test.cc new file mode 100644 index 0000000000000..4ef246045bb90 --- /dev/null +++ b/onnxruntime/test/providers/qnn/nonzero_op_test.cc @@ -0,0 +1,161 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#if !defined(ORT_MINIMAL_BUILD) + +#include + +#include "gtest/gtest.h" + +#include "core/graph/node_attr_utils.h" +#include "core/graph/onnx_protobuf.h" +#include "test/providers/qnn/qnn_test_utils.h" + +namespace onnxruntime { +namespace test { + +template +inline GetTestModelFn BuildNonZeroTestCase(const TestInputDef& input_def, const bool fix_shape) { + return [input_def, fix_shape](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + + NodeArg* output; + if (fix_shape) { + // Fix NonZero output shape to maximum possible size. + const std::vector& shape = input_def.GetShape(); + std::vector output_shape; + output_shape.push_back(static_cast(shape.size())); + output_shape.push_back(std::accumulate(shape.begin(), + shape.end(), + static_cast(1), + std::multiplies())); + + output = builder.MakeOutput(output_shape); + } else { + output = builder.MakeOutput(); + } + + builder.AddNode("NonZero", {input}, {output}); + }; +} + +template +static void RunNonZeroTestOnCPU(const TestInputDef& input_def, + const bool fix_shape, + ExpectedEPNodeAssignment expected_ep_assignment) { + ProviderOptions provider_options; + provider_options["backend_type"] = "cpu"; + + // Note that since QNN supported fixed-shape NonZero is in fact not align with ONNX opdef, it could not be executed + // by CPU EP. + RunQnnModelTest(BuildNonZeroTestCase(input_def, fix_shape), + provider_options, + 13, + expected_ep_assignment, + /*fp32_abs_err*/ 1e-5f, + /*log_severity*/ logging::Severity::kERROR, + /*verify_outputs*/ false); +} + +// Test NonZero having static shape which is supported by QNN. +TEST_F(QnnCPUBackendTests, NonZero_StaticShape) { + RunNonZeroTestOnCPU(TestInputDef({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)), + true, + ExpectedEPNodeAssignment::All); +} + +// Test NonZero having dynamic shape which is not supported by QNN. +TEST_F(QnnCPUBackendTests, NonZero_DynamicShape) { + RunNonZeroTestOnCPU(TestInputDef({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)), + false, + ExpectedEPNodeAssignment::None); +} + +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) +// +// HTP tests: +// + +template +inline GetTestModelFn BuildQDQNonZeroTestCase(const TestInputDef& input_def, const bool fix_shape) { + return [input_def, fix_shape](ModelTestBuilder& builder) { + NodeArg* input = MakeTestInput(builder, input_def); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + NodeArg* input_qdq = AddQDQNodePair(builder, + input, + input_qparams.scale, + input_qparams.zero_point); + + NodeArg* output; + if (fix_shape) { + // Fix NonZero output shape to maximum possible size. + const std::vector& shape = input_def.GetShape(); + std::vector output_shape; + output_shape.push_back(static_cast(shape.size())); + output_shape.push_back(std::accumulate(shape.begin(), + shape.end(), + static_cast(1), + std::multiplies())); + + output = builder.MakeOutput(output_shape); + } else { + output = builder.MakeOutput(); + } + + builder.AddNode("NonZero", {input_qdq}, {output}); + }; +} + +template +static void RunQDQNonZeroTestOnHTP(const TestInputDef& input_def, + const bool fix_shape, + ExpectedEPNodeAssignment expected_ep_assignment, + int opset = 13) { + ProviderOptions provider_options; + provider_options["backend_type"] = "htp"; + provider_options["offload_graph_io_quantization"] = "0"; + + // Note that since QNN supported fixed-shape NonZero is in fact not align with ONNX opdef, it could not be executed + // by CPU EP. + RunQnnModelTestHTPNoVerify(BuildQDQNonZeroTestCase(input_def, fix_shape), + provider_options, + opset, + expected_ep_assignment); +} + +// Test 8-bit NonZero having static shape which is supported by QNN. +TEST_F(QnnHTPBackendTests, NonZero_U8_StaticShape) { + RunQDQNonZeroTestOnHTP(TestInputDef({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)), + true, + ExpectedEPNodeAssignment::All); +} + +// Test 8-bit NonZero having dynamic shape which is not supported by QNN. +TEST_F(QnnHTPBackendTests, NonZero_U8_DynamicShape) { + RunQDQNonZeroTestOnHTP(TestInputDef({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)), + false, + ExpectedEPNodeAssignment::None); +} + +// Test 16-bit NonZero having static shape which is supported by QNN. +TEST_F(QnnHTPBackendTests, NonZero_U16_StaticShape) { + RunQDQNonZeroTestOnHTP(TestInputDef({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)), + true, + ExpectedEPNodeAssignment::All, + 21); +} + +// Test 16-bit NonZero having dynamic shape which is not supported by QNN. +TEST_F(QnnHTPBackendTests, NonZero_U16_DynamicShape) { + RunQDQNonZeroTestOnHTP(TestInputDef({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)), + false, + ExpectedEPNodeAssignment::None, + 21); +} + +#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) + +} // namespace test +} // namespace onnxruntime + +#endif // !defined(ORT_MINIMAL_BUILD) diff --git a/onnxruntime/test/python/quantization/test_qnn_preprocess_model.py b/onnxruntime/test/python/quantization/test_qnn_preprocess_model.py index 7e0a8496b8bfb..da3170ebdcb14 100644 --- a/onnxruntime/test/python/quantization/test_qnn_preprocess_model.py +++ b/onnxruntime/test/python/quantization/test_qnn_preprocess_model.py @@ -266,5 +266,56 @@ def test_make_io_channel_last_rank_error(self): self.assertIn("to be of rank >= 3", str(context.exception)) +class TestQNNPreprocessBase(unittest.TestCase): + """Test base class for QNN preprocess.""" + + __test__ = False + + def setUp(self): + """Set up.""" + self._model_path = Path("model.onnx") + self._preprocessed_model_path = Path("model_preprocessed.onnx") + + def tearDown(self): + """Tear down.""" + if self._model_path.exists(): + self._model_path.unlink() + if self._preprocessed_model_path.exists(): + self._preprocessed_model_path.unlink() + + +class TestShapeNonZero(TestQNNPreprocessBase): + """Test ShapeNonZero preprocess.""" + + def test_basic(self): + """Test basic case.""" + + def build_model(): + """Build model.""" + input_ = onnx.helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, [1, 3, 4, 4]) + output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.INT64, None) + + nonzero_node = onnx.helper.make_node("NonZero", ["input"], ["nonzero_output"]) + transpose_node = onnx.helper.make_node("Transpose", ["nonzero_output"], ["output"], perm=[0, 1]) + + graph = onnx.helper.make_graph([nonzero_node, transpose_node], "model", inputs=[input_], outputs=[output]) + model = onnx.helper.make_model(graph) + return onnx.shape_inference.infer_shapes(model) + + onnx.save_model(build_model(), self._model_path) + + modified = qnn_preprocess_model(self._model_path, self._preprocessed_model_path) + self.assertTrue(modified) + + preprocessed_model = onnx.load(self._preprocessed_model_path) + + def get_shape(vi): + """Get shape for value info.""" + return [dim.dim_value for dim in vi.type.tensor_type.shape.dim] + + self.assertEqual(get_shape(preprocessed_model.graph.value_info[0]), [4, 48]) + self.assertEqual(get_shape(preprocessed_model.graph.output[0]), [4, 48]) + + if __name__ == "__main__": unittest.main()