Skip to content

Commit 03a2544

Browse files
Add telemetry for logging GPU and NPU driver information in ONNX Runtime (microsoft#24734)
### Description This feature enhances telemetry by gathering and logging driver information for GPU and NPU devices detected on Windows systems. The implementation includes: - **Driver Information Collection**: - Introduced a `device_version_info` map to categorize driver information by device type (GPU or NPU). - For each detected device: - Queries the Windows Registry for driver version information using `RegQueryValueExW`. - Collects driver names (device descriptions) and versions into the appropriate device type category. - **Telemetry Logging**: - Calls `LogDriverInfoEvent` with the following parameters: - `device_class`: Indicates the device type ("GPU" or "NPU"). - `driver_names`: Comma-separated list of driver names as wide strings. - `driver_versions`: Comma-separated list of driver versions as wide strings. ### Motivation and Context Having telemetry for GPU and NPU driver information is essential for monitoring and diagnosing performance issues between driver versions. ### Performance and Testing This change was tested on Windows with a variety of sample apps along with onnxruntime_test_all and onnxruntime_perf_test The code adds an additional 25-30 microseconds for each NPU/GPU that gets enumerated. --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 5905ba0 commit 03a2544

File tree

5 files changed

+97
-0
lines changed

5 files changed

+97
-0
lines changed

onnxruntime/core/platform/telemetry.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,12 @@ void Telemetry::LogExecutionProviderEvent(LUID* adapterLuid) const {
8989
ORT_UNUSED_PARAMETER(adapterLuid);
9090
}
9191

92+
void Telemetry::LogDriverInfoEvent(const std::string_view device_class,
93+
const std::wstring_view& driver_names,
94+
const std::wstring_view& driver_versions) const {
95+
ORT_UNUSED_PARAMETER(device_class);
96+
ORT_UNUSED_PARAMETER(driver_names);
97+
ORT_UNUSED_PARAMETER(driver_versions);
98+
}
99+
92100
} // namespace onnxruntime

onnxruntime/core/platform/telemetry.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ class Telemetry {
6969

7070
virtual void LogExecutionProviderEvent(LUID* adapterLuid) const;
7171

72+
virtual void LogDriverInfoEvent(const std::string_view device_class,
73+
const std::wstring_view& driver_names,
74+
const std::wstring_view& driver_versions) const;
75+
7276
private:
7377
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Telemetry);
7478
};

onnxruntime/core/platform/windows/device_discovery.cc

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "core/common/cpuid_info.h"
1414
#include "core/common/logging/logging.h"
15+
#include "core/platform/env.h"
1516
#include "core/session/abi_devices.h"
1617

1718
//// For SetupApi info
@@ -56,6 +57,26 @@ struct DeviceInfo {
5657
std::unordered_map<std::wstring, std::wstring> metadata;
5758
};
5859

60+
struct DriverInfo {
61+
std::wstring driver_versions;
62+
std::wstring driver_names;
63+
64+
void AddDevice(const std::wstring& driver_version, const std::wstring& driver_name) {
65+
if (!driver_version.empty()) {
66+
if (!driver_versions.empty()) {
67+
driver_versions += L", ";
68+
}
69+
driver_versions += driver_version;
70+
}
71+
if (!driver_name.empty()) {
72+
if (!driver_names.empty()) {
73+
driver_names += L", ";
74+
}
75+
driver_names += driver_name;
76+
}
77+
}
78+
};
79+
5980
uint64_t GetDeviceKey(uint32_t vendor_id, uint32_t device_id) {
6081
return (uint64_t(vendor_id) << 32) | device_id;
6182
}
@@ -91,6 +112,8 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
91112
const GUID local_DXCORE_HARDWARE_TYPE_ATTRIBUTE_NPU = {0xd46140c4, 0xadd7, 0x451b, 0x9e, 0x56, 0x6, 0xfe, 0x8c, 0x3b, 0x58, 0xed};
92113
const GUID local_GUID_DEVCLASS_COMPUTEACCELERATOR = {0xf01a9d53, 0x3ff6, 0x48d2, 0x9f, 0x97, 0xc8, 0xa7, 0x00, 0x4b, 0xe1, 0x0c};
93114

115+
std::unordered_map<OrtHardwareDeviceType, DriverInfo> device_version_info;
116+
94117
std::array<GUID, 3> guids = {
95118
GUID_DEVCLASS_DISPLAY,
96119
GUID_DEVCLASS_PROCESSOR,
@@ -232,11 +255,52 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
232255
entry->vendor = std::wstring(buffer, wcslen(buffer));
233256
}
234257
}
258+
259+
// Generate telemetry event to log the GPU and NPU driver name and version.
260+
if (entry->type == OrtHardwareDeviceType_CPU) {
261+
// Skip processor entries for telemetry.
262+
continue;
263+
}
264+
265+
// Open the device's driver registry key
266+
HKEY dev_reg_key = SetupDiOpenDevRegKey(devInfo, &devData,
267+
DICS_FLAG_GLOBAL,
268+
0,
269+
DIREG_DRV,
270+
KEY_READ);
271+
272+
if (dev_reg_key != INVALID_HANDLE_VALUE) {
273+
// Query the "DriverVersion" string
274+
std::wstring driver_version_str;
275+
wchar_t driver_version[256];
276+
DWORD str_size = sizeof(driver_version);
277+
DWORD type = 0;
278+
if (RegQueryValueExW(dev_reg_key, L"DriverVersion",
279+
nullptr, &type,
280+
reinterpret_cast<LPBYTE>(driver_version),
281+
&str_size) == ERROR_SUCCESS &&
282+
type == REG_SZ) {
283+
// Ensure proper null termination of a string retrieved from the Windows Registry API.
284+
driver_version[(str_size / sizeof(wchar_t)) - 1] = 0;
285+
driver_version_str = driver_version;
286+
}
287+
RegCloseKey(dev_reg_key);
288+
device_version_info[entry->type].AddDevice(driver_version_str, entry->description);
289+
}
235290
}
236291

237292
SetupDiDestroyDeviceInfoList(devInfo);
238293
}
239294

295+
// Log driver information for GPUs and NPUs
296+
const Env& env = Env::Default();
297+
for (const auto& [type, info] : device_version_info) {
298+
if (!info.driver_versions.empty() || !info.driver_names.empty()) {
299+
const std::string_view driver_class = (type == OrtHardwareDeviceType_GPU) ? "GPU" : "NPU";
300+
env.GetTelemetryProvider().LogDriverInfoEvent(driver_class, info.driver_names, info.driver_versions);
301+
}
302+
}
303+
240304
return device_info;
241305
}
242306

onnxruntime/core/platform/windows/telemetry.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,4 +385,21 @@ void WindowsTelemetry::LogExecutionProviderEvent(LUID* adapterLuid) const {
385385
TraceLoggingUInt32(adapterLuid->HighPart, "adapterLuidHighPart"));
386386
}
387387

388+
void WindowsTelemetry::LogDriverInfoEvent(const std::string_view device_class, const std::wstring_view& driver_names, const std::wstring_view& driver_versions) const {
389+
if (global_register_count_ == 0 || enabled_ == false)
390+
return;
391+
392+
TraceLoggingWrite(telemetry_provider_handle,
393+
"DriverInfo",
394+
TraceLoggingBool(true, "UTCReplace_AppSessionGuid"),
395+
TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage),
396+
TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES),
397+
TraceLoggingLevel(WINEVENT_LEVEL_INFO),
398+
// Telemetry info
399+
TraceLoggingUInt8(0, "schemaVersion"),
400+
TraceLoggingString(device_class.data(), "deviceClass"),
401+
TraceLoggingWideString(driver_names.data(), "driverNames"),
402+
TraceLoggingWideString(driver_versions.data(), "driverVersions"));
403+
}
404+
388405
} // namespace onnxruntime

onnxruntime/core/platform/windows/telemetry.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ class WindowsTelemetry : public Telemetry {
6060

6161
void LogExecutionProviderEvent(LUID* adapterLuid) const override;
6262

63+
void LogDriverInfoEvent(const std::string_view device_class,
64+
const std::wstring_view& driver_names,
65+
const std::wstring_view& driver_versions) const override;
66+
6367
using EtwInternalCallback = std::function<void(LPCGUID SourceId, ULONG IsEnabled, UCHAR Level,
6468
ULONGLONG MatchAnyKeyword, ULONGLONG MatchAllKeyword,
6569
PEVENT_FILTER_DESCRIPTOR FilterData, PVOID CallbackContext)>;

0 commit comments

Comments
 (0)