Skip to content

Commit a444857

Browse files
committed
Enhance the QNNContextProc
1 parent fb765f7 commit a444857

File tree

12 files changed

+251
-68
lines changed

12 files changed

+251
-68
lines changed

BUILD.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
- Install Visual Studio 2022:
2121
- https://docs.qualcomm.com/bundle/publicresource/topics/80-62010-1/setup.html?product=1601111740057789
2222
- Install x64 version [Python-3.12.8](https://www.python.org/ftp/python/3.12.8/python-3.12.8-amd64.exe) or install arm64 version [Python-3.12.6](https://github.com/quic/ai-engine-direct-helper/blob/main/docs/python_arm64.md) if your app is running on arm64.
23-
-
23+
2424
- Use the commands below to install Python dependency:
2525
```
2626
pip install wheel==0.45.1 setuptools==75.8.0 pybind11==2.13.6
@@ -45,7 +45,7 @@ Set QNN_SDK_ROOT=C:\Qualcomm\AIStack\QAIRT\2.42.0.251225\
4545
cd ai-engine-direct-helper
4646
python setup.py --toolchains <Supported Toolchains> --hexagonarch <Hexagon Arch> bdist_wheel
4747

48-
#for example:
48+
# For example:
4949
python setup.py --toolchains arm64x-windows-msvc --hexagonarch 73 bdist_wheel
5050

5151
# If you use below command, it will compile with default Toolchains and Hexagon Arch.

pybind/AppBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
ShareMemory::ShareMemory(const std::string& share_memory_name, const size_t share_memory_size) {
1515
m_share_memory_name = share_memory_name;
16+
m_share_memory_size = share_memory_size;
1617
g_LibAppBuilder.CreateShareMemory(share_memory_name, share_memory_size);
1718
}
1819

pybind/AppBuilder.h

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,33 @@ static inline py::dtype dtypeFromString(const std::string& dtypeStr) {
7070
return py::dtype::of<uint8_t>();
7171
}
7272

73+
// ---------------------------------------------------------------------------------
74+
// Helper: case-insensitive "float32 request" for input_data_type/output_data_type
75+
// Accepts: "float", "float32", "fp32"
76+
// ---------------------------------------------------------------------------------
77+
static inline bool isFloat32Request(const std::string& s) {
78+
std::string t = s;
79+
for (auto& c : t) c = static_cast<char>(::tolower(c));
80+
return (t == "float" || t == "float32" || t == "fp32");
81+
}
82+
83+
// ---------------------------------------------------------------------------------
84+
// Helper: identify if a py::dtype is float32 (NumPy kind 'f' and itemsize == 4)
85+
// Note: We avoid relying on dtype object identity; use kind/itemsize instead.
86+
// ---------------------------------------------------------------------------------
87+
static inline bool isNumpyFloat32Dtype(const py::dtype& dt) {
88+
try {
89+
// dt.kind is a 1-char string in NumPy, e.g. 'f' for floating
90+
std::string kindStr = py::str(dt.attr("kind"));
91+
char kind = kindStr.empty() ? '\0' : kindStr[0];
92+
py::ssize_t itemsize = dt.attr("itemsize").cast<py::ssize_t>();
93+
return (kind == 'f' && itemsize == 4);
94+
} catch (...) {
95+
// conservative fallback
96+
return false;
97+
}
98+
}
99+
73100
// ---------------------------------------------------------------------------
74101
// Helper: product of dims (for output element count)
75102
// ---------------------------------------------------------------------------
@@ -206,7 +233,8 @@ std::vector<py::array> inference(std::string model_name, const std::vector<py::a
206233

207234
// Keep temporary converted/contiguous arrays alive during ModelInference
208235
std::vector<py::array> keepAlive;
209-
const bool floatMode = (input_data_type == "float");
236+
const bool floatMode = isFloat32Request(input_data_type);
237+
const bool floatOutMode = isFloat32Request(output_data_type);
210238

211239
//QNN_INF("inference input vector length: %d\n", input.size());
212240

@@ -271,7 +299,16 @@ std::vector<py::array> inference(std::string model_name, const std::vector<py::a
271299
{ static_cast<py::ssize_t>(dt.itemsize()) },
272300
outputBuffers[i],
273301
free_data);
274-
output.push_back(result);
302+
303+
// If user requests float output, cast to float32 before returning.
304+
// IMPORTANT: do NOT reinterpret the raw buffer as float32 (size may not match).
305+
// We first create 'result' using the inferred real dtype, then cast (copy) if needed.
306+
if (floatOutMode && !isNumpyFloat32Dtype(dt)) {
307+
py::array_t<float, py::array::c_style | py::array::forcecast> farr(result);
308+
output.push_back(py::array(farr));
309+
} else {
310+
output.push_back(result);
311+
}
275312
}
276313
//print_time("convert Data To ArrayV");
277314

@@ -288,7 +325,8 @@ std::vector<py::array> inference_P(std::string model_name, std::string proc_name
288325

289326
// Keep temporary converted/contiguous arrays alive during ModelInference
290327
std::vector<py::array> keepAlive;
291-
const bool floatMode = (input_data_type == "float");
328+
const bool floatMode = isFloat32Request(input_data_type);
329+
const bool floatOutMode = isFloat32Request(output_data_type);
292330

293331
for (auto i = 0; i < input.size(); i++) {
294332
if (floatMode) {
@@ -351,7 +389,15 @@ std::vector<py::array> inference_P(std::string model_name, std::string proc_name
351389
{ static_cast<py::ssize_t>(dt.itemsize()) },
352390
outputBuffers[i],
353391
free_data);
354-
output.push_back(result);
392+
393+
// If user requests float output, cast to float32 before returning.
394+
// For shared memory outputs, this will create a float32 copy (shared memory remains untouched).
395+
if (floatOutMode && !isNumpyFloat32Dtype(dt)) {
396+
py::array_t<float, py::array::c_style | py::array::forcecast> farr(result);
397+
output.push_back(py::array(farr));
398+
} else {
399+
output.push_back(result);
400+
}
355401
}
356402
//print_time("convert Data To ArrayV");
357403

@@ -371,6 +417,7 @@ int delete_memory(std::string share_memory_name) {
371417
class ShareMemory {
372418
public:
373419
std::string m_share_memory_name;
420+
size_t m_share_memory_size = 0;
374421

375422
ShareMemory(const std::string& share_memory_name, const size_t share_memory_size);
376423
~ShareMemory();

script/qai_appbuilder/qnncontext.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,6 @@ def __init__(self,
222222
model_path: str = "None",
223223
backend_lib_path: str = "None",
224224
system_lib_path: str = "None",
225-
runtime: str = Runtime.HTP,
226225
is_async: bool = False,
227226
input_data_type: str = DataType.FLOAT,
228227
output_data_type: str = DataType.FLOAT
@@ -258,7 +257,6 @@ def __init__(self,
258257
model_path: str = "None",
259258
backend_lib_path: str = "None",
260259
system_lib_path: str = "None",
261-
runtime: str = Runtime.HTP,
262260
is_async: bool = False,
263261
input_data_type: str = DataType.FLOAT,
264262
output_data_type: str = DataType.FLOAT
@@ -300,7 +298,6 @@ def __init__(self,
300298
backend_lib_path: str = "None",
301299
system_lib_path: str = "None",
302300
lora_adapters=None,
303-
runtime: str = Runtime.HTP,
304301
is_async: bool = False,
305302
input_data_type: str = DataType.FLOAT,
306303
output_data_type: str = DataType.FLOAT
@@ -355,6 +352,7 @@ def __init__(self,
355352
"""
356353
self.share_memory_name = share_memory_name
357354
self.m_memory = appbuilder.ShareMemory(share_memory_name, share_memory_size)
355+
self.share_memory_size = share_memory_size
358356

359357
#@timer
360358
def __del__(self):

setup.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@ def build_clean():
141141
if os.path.exists(binary_path + "/QAIAppSvc.exe"):
142142
os.remove(binary_path + "/libappbuilder.dll")
143143
os.remove(binary_path + "/QAIAppSvc.exe")
144+
if os.path.exists(binary_path + "/QAIAppSvc.pdb"):
145+
os.remove(binary_path + "/QAIAppSvc.pdb")
144146
if os.path.exists(binary_path + "/libappbuilder.pdb"):
145147
os.remove(binary_path + "/libappbuilder.pdb")
146148
if os.path.exists(binary_path + "/libappbuilder.so"):
@@ -164,6 +166,8 @@ def build_cmake():
164166
if os.path.exists("lib/" + CONFIG + "/QAIAppSvc.exe"):
165167
shutil.copy("lib/" + CONFIG +"/libappbuilder.dll", binary_path)
166168
shutil.copy("lib/" + CONFIG + "/QAIAppSvc.exe", binary_path)
169+
if os.path.exists("lib/" + CONFIG + "/QAIAppSvc.pdb"):
170+
shutil.copy("lib/" + CONFIG + "/QAIAppSvc.pdb", binary_path)
167171
if os.path.exists("lib/" + CONFIG + "/libappbuilder.pdb"):
168172
shutil.copy("lib/" + CONFIG + "/libappbuilder.pdb", binary_path)
169173
if os.path.exists("lib/" + "libappbuilder.so"):

src/LibAppBuilder.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ bool ModelInitializeEx(const std::string& model_name, const std::string& proc_na
465465
bool ModelInferenceEx(std::string model_name, std::string proc_name, std::string share_memory_name,
466466
std::vector<uint8_t*>& inputBuffers, std::vector<size_t>& inputSize,
467467
std::vector<uint8_t*>& outputBuffers, std::vector<size_t>& outputSize,
468-
std::string& perfProfile, size_t graphIndex) {
468+
std::string& perfProfile, size_t graphIndex, size_t share_memory_size=0) {
469469
bool result = true;
470470

471471
//QNN_INF("LibAppBuilder::ModelInference: %s \n", model_name.c_str());
@@ -487,7 +487,7 @@ bool ModelInferenceEx(std::string model_name, std::string proc_name, std::string
487487
result = false;
488488
}
489489

490-
if (result && sample_app::StatusCode::SUCCESS != app->executeGraphsBuffers(inputBuffers, outputBuffers, outputSize, perfProfile, graphIndex)) {
490+
if (result && sample_app::StatusCode::SUCCESS != app->executeGraphsBuffers(inputBuffers, outputBuffers, outputSize, perfProfile, graphIndex, share_memory_size)) {
491491
app->reportError("Graph Execution failure");
492492
result = false;
493493
}
@@ -599,9 +599,9 @@ bool LibAppBuilder::ModelInference(std::string model_name, std::string proc_name
599599

600600
bool LibAppBuilder::ModelInference(std::string model_name, std::vector<uint8_t*>& inputBuffers,
601601
std::vector<uint8_t*>& outputBuffers, std::vector<size_t>& outputSize,
602-
std::string& perfProfile, size_t graphIndex){
602+
std::string& perfProfile, size_t graphIndex, size_t share_memory_size){
603603
std::vector<size_t> inputSize;
604-
return ModelInferenceEx(model_name, "", "", inputBuffers, inputSize, outputBuffers, outputSize, perfProfile, graphIndex);
604+
return ModelInferenceEx(model_name, "", "", inputBuffers, inputSize, outputBuffers, outputSize, perfProfile, graphIndex, share_memory_size);
605605
}
606606

607607
bool LibAppBuilder::ModelApplyBinaryUpdate(const std::string model_name, std::vector<LoraAdapter>& lora_adapters) {

src/LibAppBuilder.hpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class LIBAPPBUILDER_API LibAppBuilder
6363

6464
bool ModelInference(std::string model_name, std::vector<uint8_t*>& inputBuffers,
6565
std::vector<uint8_t*>& outputBuffers, std::vector<size_t>& outputSize,
66-
std::string& perfProfile, size_t graphIndex = 0);
66+
std::string& perfProfile, size_t graphIndex = 0, size_t share_memory_size = 0);
6767
bool ModelInference(std::string model_name, std::string proc_name, std::string share_memory_name,
6868
std::vector<uint8_t*>& inputBuffers, std::vector<size_t>& inputSize,
6969
std::vector<uint8_t*>& outputBuffers, std::vector<size_t>& outputSize,
@@ -77,34 +77,33 @@ class LIBAPPBUILDER_API LibAppBuilder
7777
bool CreateShareMemory(std::string share_memory_name, size_t share_memory_size);
7878
bool DeleteShareMemory(std::string share_memory_name);
7979

80-
// issue#24
8180
std::vector<std::vector<size_t>> getInputShapes(std::string model_name);
8281
std::vector<std::string> getInputDataType(std::string model_name);
8382
std::vector<std::string> getOutputDataType(std::string model_name);
8483
std::vector<std::vector<size_t>> getOutputShapes(std::string model_name);
8584
std::string getGraphName(std::string model_name);
8685
std::vector<std::string> getInputName(std::string model_name);
8786
std::vector<std::string> getOutputName(std::string model_name);
88-
ModelInfo_t getModelInfo(std::string model_name, std::string proc_name, std::string input);
89-
ModelInfo_t getModelInfo(std::string model_name, std::string input);
90-
ModelInfo_t getModelInfoExt(std::string model_name, std::string input);
91-
//proc
87+
9288
std::vector<std::vector<size_t>> getInputShapes(std::string model_name, std::string proc_name);
9389
std::vector<std::string> getInputDataType(std::string model_name, std::string proc_name);
94-
std::vector<std::string> getInputName(std::string model_name, std::string proc_name);
95-
std::string getGraphName(std::string model_name, std::string proc_name);
9690
std::vector<std::string> getOutputDataType(std::string model_name, std::string proc_name);
9791
std::vector<std::vector<size_t>> getOutputShapes(std::string model_name, std::string proc_name);
92+
std::string getGraphName(std::string model_name, std::string proc_name);
93+
std::vector<std::string> getInputName(std::string model_name, std::string proc_name);
9894
std::vector<std::string> getOutputName(std::string model_name, std::string proc_name);
99-
// issue#24
95+
96+
ModelInfo_t getModelInfo(std::string model_name, std::string input);
97+
ModelInfo_t getModelInfo(std::string model_name, std::string proc_name, std::string input);
98+
ModelInfo_t getModelInfoExt(std::string model_name, std::string input);
99+
100100
std::vector<std::vector<size_t>> m_inputShapes;
101101
std::vector<std::string> m_inputDataType;
102102
std::vector<std::vector<size_t>> m_outputShapes;
103103
std::vector<std::string> m_outputDataType;
104104
std::string m_graphName;
105105
std::vector<std::string> m_inputName;
106106
std::vector<std::string> m_outputName;
107-
108107
};
109108

110109

0 commit comments

Comments
 (0)