Skip to content

Commit a7ae9ae

Browse files
authored
[QNN-EP] Add GPU to QNN EP factory (#26085)
## Description Make the GPU backend visible to users of the WinML API. The V2 EP selection mechanism will now expose the GPU device for QNN. The GPU can be picked when: * The device list contains only GPU * The execution policy is set to PREFER_GPU If multiple devices (e.g. HTP and GPU) are provided to `AppendExecutionProviders_V2`, whichever device was provided last in the list will be used. ## Motivation and Context Required to enable WinML usage with the QNN GPU backend
1 parent d3a916d commit a7ae9ae

File tree

2 files changed

+43
-35
lines changed

2 files changed

+43
-35
lines changed

onnxruntime/core/providers/qnn/qnn_provider_factory.cc

Lines changed: 29 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,11 @@ struct QNN_Provider : Provider {
8282

8383
Status CreateIExecutionProvider(const OrtHardwareDevice* const* /*devices*/,
8484
const OrtKeyValuePairs* const* /*ep_metadata*/,
85-
size_t num_devices,
85+
size_t /*num_devices*/,
8686
ProviderOptions& provider_options,
8787
const OrtSessionOptions& session_options,
8888
const OrtLogger& logger,
8989
std::unique_ptr<IExecutionProvider>& ep) override {
90-
if (num_devices != 1) {
91-
return Status(common::ONNXRUNTIME, ORT_EP_FAIL, "QNN EP only supports one device.");
92-
}
93-
9490
const ConfigOptions* config_options = &session_options.GetConfigOptions();
9591

9692
std::array<const void*, 2> configs_array = {&provider_options, config_options};
@@ -153,13 +149,11 @@ struct QnnEpFactory : OrtEpFactory {
153149
QnnEpFactory(const OrtApi& ort_api_in,
154150
const OrtLogger& default_logger_in,
155151
const char* ep_name,
156-
OrtHardwareDeviceType hw_type,
157-
std::string qnn_backend_path)
152+
std::unordered_map<OrtHardwareDeviceType, std::string> supported_backends)
158153
: ort_api{ort_api_in},
159154
default_logger{default_logger_in},
160155
ep_name{ep_name},
161-
ort_hw_device_type{hw_type},
162-
qnn_backend_path{std::move(qnn_backend_path)} {
156+
supported_backends{std::move(supported_backends)} {
163157
ort_version_supported = ORT_API_VERSION;
164158
GetName = GetNameImpl;
165159
GetVendor = GetVendorImpl;
@@ -200,9 +194,8 @@ struct QnnEpFactory : OrtEpFactory {
200194
// Creates and returns OrtEpDevice instances for all OrtHardwareDevices that this factory supports.
201195
// An EP created with this factory is expected to be able to execute a model with *all* supported
202196
// hardware devices at once. A single instance of QNN EP is not currently setup to partition a model among
203-
// multiple different QNN backends at once (e.g, npu, cpu, gpu), so this factory instance is set to only
204-
// support one backend: npu. To support a different backend, like gpu, create a different factory instance
205-
// that only supports GPU.
197+
// multiple different QNN backends at once (e.g, npu, cpu, gpu), so currently this factory instance is set
198+
// to pick the last specified backend only.
206199
static OrtStatus* GetSupportedDevicesImpl(OrtEpFactory* this_ptr,
207200
const OrtHardwareDevice* const* devices,
208201
size_t num_devices,
@@ -214,14 +207,14 @@ struct QnnEpFactory : OrtEpFactory {
214207

215208
for (size_t i = 0; i < num_devices && num_ep_devices < max_ep_devices; ++i) {
216209
const OrtHardwareDevice& device = *devices[i];
217-
if (factory->ort_api.HardwareDevice_Type(&device) == factory->ort_hw_device_type &&
210+
auto supported_backend_it = factory->supported_backends.find(factory->ort_api.HardwareDevice_Type(&device));
211+
if (supported_backend_it != factory->supported_backends.end() &&
218212
factory->ort_api.HardwareDevice_VendorId(&device) == factory->vendor_id) {
219213
OrtKeyValuePairs* ep_options = nullptr;
220214
factory->ort_api.CreateKeyValuePairs(&ep_options);
221-
factory->ort_api.AddKeyValuePair(ep_options, "backend_path", factory->qnn_backend_path.c_str());
215+
factory->ort_api.AddKeyValuePair(ep_options, "backend_path", supported_backend_it->second.c_str());
222216
OrtStatus* status = factory->ort_api.GetEpApi()->CreateEpDevice(factory, &device, nullptr, ep_options,
223217
&ep_devices[num_ep_devices++]);
224-
225218
factory->ort_api.ReleaseKeyValuePairs(ep_options);
226219
ORT_API_RETURN_IF_ERROR(status);
227220
}
@@ -289,8 +282,8 @@ struct QnnEpFactory : OrtEpFactory {
289282

290283
// Qualcomm vendor ID. Refer to the ACPI ID registry (search Qualcomm): https://uefi.org/ACPI_ID_List
291284
const uint32_t vendor_id{'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24)};
292-
const OrtHardwareDeviceType ort_hw_device_type; // Supported OrtHardwareDevice
293-
const std::string qnn_backend_path; // QNN backend path for OrtHardwareDevice
285+
const std::unordered_map<OrtHardwareDeviceType, std::string> supported_backends; // Supported OrtHardwareDeviceTypes
286+
// and their QNN backend paths
294287
};
295288

296289
extern "C" {
@@ -302,35 +295,36 @@ OrtStatus* CreateEpFactories(const char* /*registration_name*/, const OrtApiBase
302295
OrtEpFactory** factories, size_t max_factories, size_t* num_factories) {
303296
const OrtApi* ort_api = ort_api_base->GetApi(ORT_API_VERSION);
304297

305-
// Factory could use registration_name or define its own EP name.
298+
std::unordered_map<OrtHardwareDeviceType, std::string> supported_backends = {
306299
#if defined(_WIN32)
307-
std::string backend_path = "QnnHtp.dll";
300+
{OrtHardwareDeviceType_NPU, "QnnHtp.dll"},
301+
{OrtHardwareDeviceType_GPU, "QnnGpu.dll"},
308302
#else
309-
std::string backend_path = "libQnnHtp.so";
303+
{OrtHardwareDeviceType_NPU, "libQnnHtp.so"},
304+
{OrtHardwareDeviceType_GPU, "libQnnGpu.so"},
310305
#endif
306+
};
311307

312-
// Identify the path of the current dynamic library, and expect that backend_path is in the same directory.
313-
onnxruntime::PathString current_path = GetDynamicLibraryLocationByAddress(reinterpret_cast<const void*>(&CreateEpFactories));
314-
if (!current_path.empty()) {
315-
const std::filesystem::path parent_path = std::filesystem::path{std::move(current_path)}.parent_path();
316-
backend_path = (parent_path / backend_path).string();
317-
}
308+
for (auto& [_, backend_path] : supported_backends) {
309+
// Identify the path of the current dynamic library, and expect that backend_path is in the same directory.
310+
onnxruntime::PathString current_path = GetDynamicLibraryLocationByAddress(
311+
reinterpret_cast<const void*>(&CreateEpFactories));
318312

319-
auto factory_npu = std::make_unique<QnnEpFactory>(*ort_api, *default_logger,
320-
onnxruntime::kQnnExecutionProvider,
321-
OrtHardwareDeviceType_NPU,
322-
std::move(backend_path));
323-
324-
// If want to support GPU, create a new factory instance because QNN EP is not currently setup to partition a single model
325-
// among heterogeneous devices.
326-
// std::unique_ptr<OrtEpFactory> factory_gpu = std::make_unique<QnnEpFactory>(*ort_api, "QNNExecutionProvider_GPU", OrtHardwareDeviceType_GPU, "gpu");
313+
if (!current_path.empty()) {
314+
const std::filesystem::path parent_path = std::filesystem::path{std::move(current_path)}.parent_path();
315+
backend_path = (parent_path / backend_path).string();
316+
}
317+
}
327318

328319
if (max_factories < 1) {
329320
return ort_api->CreateStatus(ORT_INVALID_ARGUMENT,
330321
"Not enough space to return EP factory. Need at least one.");
331322
}
332323

333-
factories[0] = factory_npu.release();
324+
auto factory = std::make_unique<QnnEpFactory>(*ort_api, *default_logger,
325+
onnxruntime::kQnnExecutionProvider,
326+
supported_backends);
327+
factories[0] = factory.release();
334328
*num_factories = 1;
335329

336330
return nullptr;

onnxruntime/test/providers/qnn/qnn_basic_test.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1436,6 +1436,20 @@ TEST_F(QnnHTPBackendTests, AutoEp_PreferNpu) {
14361436

14371437
ASSERT_ORTSTATUS_OK(Ort::GetApi().UnregisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider));
14381438
}
1439+
1440+
TEST_F(QnnGPUBackendTests, AutoEp_PreferGpu) {
1441+
ASSERT_ORTSTATUS_OK(Ort::GetApi().RegisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider,
1442+
ORT_TSTR("onnxruntime_providers_qnn.dll")));
1443+
1444+
Ort::SessionOptions so;
1445+
so.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_GPU);
1446+
1447+
const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.onnx";
1448+
Ort::Session session(*ort_env, ort_model_path, so);
1449+
EXPECT_TRUE(SessionHasEp(session, kQnnExecutionProvider));
1450+
1451+
ASSERT_ORTSTATUS_OK(Ort::GetApi().UnregisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider));
1452+
}
14391453
#endif // defined(WIN32) && !BUILD_QNN_EP_STATIC_LIB
14401454

14411455
// Test whether QNN EP can handle the case where the number of graph inputs and

0 commit comments

Comments
 (0)