Skip to content

Commit b198f48

Browse files
committed
[QNN EP] Add QNN Execution Provider Tool to Samples
1. Add qnn_ep_tool to leverage QnnCpu.dll and QnnHtp.dll for inference 2. Add onnxruntime_qnn_ep_tool.cmake to build the qnn_ep_tool 3. Modify CMakeList.txt and tools/ci_build/build.py to build the tool with flag onnxruntime_BUILD_QNN_EP_TOOL=ON 4. The qnn_ep_tool supports .pb and .raw formats for input data and output results.
1 parent fe7634e commit b198f48

File tree

9 files changed

+686
-0
lines changed

9 files changed

+686
-0
lines changed

cmake/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ option(onnxruntime_USE_RKNPU "Build with RKNPU support" OFF)
8888
option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
8989
option(onnxruntime_USE_JSEP "Build with JavaScript implemented kernels support" OFF)
9090
option(onnxruntime_BUILD_UNIT_TESTS "Build ONNXRuntime unit tests" ON)
91+
option(onnxruntime_BUILD_QNN_EP_TOOL "Build ONNXRuntime qnn-ep-tool" OFF)
9192
option(onnxruntime_BUILD_CSHARP "Build C# library" OFF)
9293
option(onnxruntime_BUILD_OBJC "Build Objective-C library" OFF)
9394
option(onnxruntime_USE_PREINSTALLED_EIGEN "Use pre-installed EIGEN. Need to provide eigen_SOURCE_PATH if turn this on." OFF)
@@ -1842,6 +1843,10 @@ if (onnxruntime_BUILD_UNIT_TESTS)
18421843
list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_unittests)
18431844
endif()
18441845

1846+
if (onnxruntime_BUILD_QNN_EP_TOOL)
1847+
list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_qnn_ep_tool)
1848+
endif()
1849+
18451850
if (onnxruntime_BUILD_WINML_TESTS)
18461851
list(APPEND ONNXRUNTIME_CMAKE_FILES winml_unittests)
18471852
endif()
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Copyright (c) Microsoft Corporation. All rights reserved.
2+
# Licensed under the MIT License.
3+
4+
set(QNN_EP_TOOL_SRC_DIR ${REPO_ROOT}/samples/qnn_ep_tool)
5+
onnxruntime_add_executable(
6+
qnn_ep_tool
7+
${QNN_EP_TOOL_SRC_DIR}/main.cpp
8+
${QNN_EP_TOOL_SRC_DIR}/utils.cpp
9+
${QNN_EP_TOOL_SRC_DIR}/model_info.cpp
10+
)
11+
include_directories(${QNN_EP_TOOL_SRC_DIR})
12+
target_link_libraries(qnn_ep_tool onnxruntime onnx)

samples/qnn_ep_tool/README.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<!-- Copyright (c) Microsoft Corporation. All rights reserved.
2+
Licensed under the MIT License. -->
3+
4+
# Onnxruntime Qualcomm Neural Network Executaion Provider Tool (QNN EP Tool)
5+
- The tool runs onnxruntime session inference with QNN Executaion Provider on inputs and save the outputs.
6+
- The inputs/outputs can be .pb or .raw format.
7+
8+
## Model and Inputs Data Directory Structure
9+
The tool expects the onnx model and inputs data to be arranged in the following directory structure:
10+
```bash
11+
resnet18-v1-7
12+
├── resnet18-v1-7.onnx
13+
├── test_data_set_0
14+
│ └── input_0.pb (or input_0.raw)
15+
└── test_data_set_1
16+
└── input_0.pb (or input_0.raw)
17+
```
18+
In each test_data_set_X/
19+
1. If only .pb input data provided, the tool will use .pb as input
20+
2. If only .raw input data provided, the tool will use .raw as input
21+
3. If both are provided (not recommended), the tool will prioritize .pb
22+
23+
## Build
24+
The tool can be built with the flag "--build_qnn_ep_tool" set.
25+
```cmd
26+
.\build.bat --config RelWithDebInfo --build_shared_lib --parallel --compile_no_warning_as_error --cmake_generator "Visual Studio 17 2022" --use_qnn --qnn_home <path-to-qnn-sdk> --build_qnn_ep_tool
27+
```
28+
29+
## Command Line Usage
30+
1. The following command serves as an example to run the tool
31+
```ps1
32+
# qnn_ep_tools.exe <model_dir> <backend_path>
33+
.\qnn_ep_tools.exe resnet18-v1-7 QnnCpu.dll
34+
```
35+
36+
2. The tool will produce .pb / .raw under the corresponding directory
37+
```bash
38+
resnet18-v1-7
39+
├── resnet18-v1-7.onnx
40+
├── test_data_set_0
41+
│ ├── input_0.pb (input_0.raw)
42+
│ └── out_0.pb (out_0.raw)
43+
└── test_data_set_1
44+
├── input_0.pb (input_0.raw)
45+
└── out_0.pb (out_0.raw)
46+
```
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
#pragma once
5+
6+
#include <onnxruntime_cxx_api.h>
7+
8+
#include <vector>
9+
10+
class OnnxModelInfo {
11+
public:
12+
OnnxModelInfo(const OrtApi* g_ort, const OrtSession* session, OrtAllocator* allocator);
13+
size_t get_num_in_tensors();
14+
std::vector<char*> get_in_tensor_names();
15+
std::vector<std::vector<int64_t>> get_in_tensor_dims();
16+
std::vector<ONNXTensorElementDataType> get_in_tensor_element_types();
17+
std::vector<int64_t> get_in_tensor_element_nums();
18+
std::vector<OrtValue*>& get_in_tensors();
19+
20+
size_t get_num_out_tensors();
21+
std::vector<char*> get_out_tensor_names();
22+
std::vector<std::vector<int64_t>> get_out_tensor_dims();
23+
std::vector<ONNXTensorElementDataType> get_out_tensor_element_types();
24+
std::vector<int64_t> get_out_tensor_element_nums();
25+
std::vector<OrtValue*>& get_out_tensors();
26+
27+
void release_ort_values(const OrtApi* g_ort);
28+
void PrintOnnxModelInfo();
29+
private:
30+
size_t num_in_tensors;
31+
std::vector<char*> in_tensor_names;
32+
std::vector<std::vector<int64_t>> in_tensor_dims;
33+
std::vector<ONNXTensorElementDataType> in_tensor_element_types;
34+
std::vector<int64_t> in_tensor_element_nums;
35+
std::vector<OrtValue*> in_tensors;
36+
37+
size_t num_out_tensors;
38+
std::vector<char*> out_tensor_names;
39+
std::vector<std::vector<int64_t>> out_tensor_dims;
40+
std::vector<ONNXTensorElementDataType> out_tensor_element_types;
41+
std::vector<int64_t> out_tensor_element_nums;
42+
std::vector<OrtValue*> out_tensors;
43+
};
44+
45+
size_t GetONNXTypeSize(ONNXTensorElementDataType dtype);
46+
int onnx_element_type_to_tensorproto_dtype(ONNXTensorElementDataType dtype);
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
#pragma once
5+
6+
#include <onnxruntime_cxx_api.h>
7+
8+
#include <filesystem> //NOLINT
9+
#include <string>
10+
#include <vector>
11+
12+
#include "core/platform/path_lib.h"
13+
14+
#include "include/model_info.hpp"
15+
16+
std::basic_string<PATH_CHAR_TYPE> find_model_path(std::string model_dir);
17+
18+
std::vector<std::basic_string<PATH_CHAR_TYPE>> find_test_data_sets(std::string model_dir);
19+
20+
std::string check_data_format(const std::filesystem::path test_data_set_dir);
21+
22+
void load_input_tensors_from_raws(
23+
std::filesystem::path inp_dir,
24+
const OrtApi* g_ort,
25+
OnnxModelInfo* model_info,
26+
std::vector<std::vector<float>>* input_data
27+
);
28+
29+
void dump_output_tensors_to_raws(
30+
std::filesystem::path out_dir,
31+
const OrtApi* g_ort,
32+
OnnxModelInfo* model_info
33+
);
34+
35+
void load_input_tensors_from_pbs(
36+
std::filesystem::path inp_dir,
37+
const OrtApi* g_ort,
38+
OnnxModelInfo* model_info,
39+
std::vector<std::vector<float>>* input_data
40+
);
41+
42+
void dump_output_tensors_to_pbs(
43+
std::filesystem::path out_dir,
44+
const OrtApi* g_ort,
45+
OnnxModelInfo* model_info
46+
);

samples/qnn_ep_tool/main.cpp

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
#include <onnxruntime_cxx_api.h>
5+
6+
#include <filesystem> // NOLINT
7+
#include <iostream>
8+
#include <string>
9+
#include <vector>
10+
11+
#include "core/platform/path_lib.h"
12+
#include "include/utils.hpp"
13+
14+
int main(int, char* argv[]) {
15+
std::string model_dir(argv[1]);
16+
std::cout << model_dir << std::endl;
17+
std::basic_string<PATH_CHAR_TYPE> model_path = find_model_path(model_dir);
18+
if (model_path.size() <= 0) {
19+
std::cout << ".onnx model should be provided" << std::endl;
20+
exit(0);
21+
}
22+
23+
// model
24+
std::cout << "[Successfully Load Model] " << std::endl;
25+
const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
26+
std::cout << "[ORT_API_VERSION] " << ORT_API_VERSION << std::endl;
27+
OrtEnv* env;
28+
g_ort->CreateEnv(ORT_LOGGING_LEVEL_VERBOSE, "test", &env);
29+
OrtSessionOptions* session_options;
30+
g_ort->CreateSessionOptions(&session_options);
31+
g_ort->SetIntraOpNumThreads(session_options, 1);
32+
g_ort->SetSessionGraphOptimizationLevel(session_options, ORT_ENABLE_BASIC);
33+
34+
std::string backend_path(argv[2]);
35+
if (!std::filesystem::exists(std::filesystem::path(backend_path))) {
36+
std::cout << "Not Found:" << backend_path << std::endl;
37+
exit(0);
38+
}
39+
std::vector<const char*> options_keys = {"backend_path"};
40+
std::vector<const char*> options_values = {backend_path.c_str()};
41+
42+
g_ort->SessionOptionsAppendExecutionProvider(
43+
session_options, "QNN",
44+
options_keys.data(), options_values.data(), options_keys.size()
45+
);
46+
47+
OrtSession* session;
48+
g_ort->CreateSession(env, model_path.c_str(), session_options, &session);
49+
std::cout << "[Successfully CreateSession]" << std::endl;
50+
51+
OrtAllocator* allocator;
52+
g_ort->GetAllocatorWithDefaultOptions(&allocator);
53+
54+
OnnxModelInfo model_info(g_ort, session, allocator);
55+
model_info.PrintOnnxModelInfo();
56+
57+
// Multiple test_data_set_X
58+
std::vector<std::basic_string<PATH_CHAR_TYPE>> test_data_sets = find_test_data_sets(model_dir);
59+
std::cout << "test_data_sets.size() " << test_data_sets.size() << std::endl;
60+
for (size_t idx = 0; idx < test_data_sets.size(); idx++) {
61+
std::cout << "---- test_data_set_" << idx << " ----" << std::endl;
62+
std::vector<std::vector<float>> input_data;
63+
auto test_data_set_dir = std::filesystem::path(test_data_sets[idx]);
64+
auto data_format = check_data_format(test_data_set_dir);
65+
if (data_format == "pb") {
66+
std::cout << "[test_data_sets_" << idx << "] " << "Loading .pb" << std::endl;
67+
load_input_tensors_from_pbs(
68+
test_data_set_dir,
69+
g_ort,
70+
&model_info,
71+
&input_data
72+
);
73+
} else if (data_format == "raw") {
74+
std::cout << "[test_data_sets_" << idx << "] " << "Loading .raw" << std::endl;
75+
load_input_tensors_from_raws(
76+
test_data_set_dir,
77+
g_ort,
78+
&model_info,
79+
&input_data
80+
);
81+
}
82+
std::cout << "[test_data_sets_" << idx << "] " << "Successfully Load Inputs" << std::endl;
83+
g_ort->Run(
84+
session,
85+
nullptr,
86+
model_info.get_in_tensor_names().data(),
87+
(const OrtValue* const*)model_info.get_in_tensors().data(),
88+
model_info.get_in_tensors().size(),
89+
model_info.get_out_tensor_names().data(),
90+
model_info.get_out_tensor_names().size(),
91+
model_info.get_out_tensors().data()
92+
);
93+
std::cout << "[test_data_sets_" << idx << "] " << "Successfully Inference" << std::endl;
94+
if (data_format == "pb") {
95+
std::cout << "[test_data_sets_" << idx << "] " << "Dumping .pb" << std::endl;
96+
dump_output_tensors_to_pbs(
97+
test_data_set_dir,
98+
g_ort,
99+
&model_info
100+
);
101+
} else if (data_format == "raw") {
102+
std::cout << "[test_data_sets_" << idx << "] " << "Dumping .raw" << std::endl;
103+
dump_output_tensors_to_raws(
104+
test_data_set_dir,
105+
g_ort,
106+
&model_info
107+
);
108+
}
109+
std::cout << "[test_data_sets_" << idx << "] " << "Successfully Save Outputs" << std::endl;
110+
model_info.release_ort_values(g_ort);
111+
std::cout << "[test_data_sets_" << idx << "] " << "Successfully Release OrtValue" << std::endl;
112+
}
113+
return 0;
114+
}

0 commit comments

Comments
 (0)