Skip to content

Commit 4a55e2a

Browse files
committed
refactor: gpustack windows temporary support nvidia
1 parent 1a4afa8 commit 4a55e2a

File tree

4 files changed

+95
-207
lines changed

4 files changed

+95
-207
lines changed

src/detection/gpu/gpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#define FF_GPU_CORE_UTILIZATION_RATE_UNSET -1
1010
#define FF_GPU_INDEX_UNSET ((uint8_t)-1)
1111
#define FF_GPU_BUS_UNSET ((uint32_t)-1)
12+
#define FF_GPU_COUNT_UNSET ((uint32_t)-1)
1213

1314
extern const char* FF_GPU_VENDOR_NAME_APPLE;
1415
extern const char* FF_GPU_VENDOR_NAME_AMD;

src/detection/gpu/gpu_driver_specific.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ typedef enum FFGpuDriverConditionType
77
FF_GPU_DRIVER_CONDITION_TYPE_BUS_ID = 1 << 0,
88
FF_GPU_DRIVER_CONDITION_TYPE_DEVICE_ID = 1 << 1,
99
FF_GPU_DRIVER_CONDITION_TYPE_LUID = 1 << 2,
10+
FF_GPU_DRIVER_CONDITION_TYPE_INDEX = 1 << 3,
1011
} FFGpuDriverConditionType;
1112

1213
typedef struct FFGpuDriverPciBusId
@@ -33,6 +34,7 @@ typedef struct FFGpuDriverCondition
3334
FFGpuDriverPciBusId pciBusId;
3435
FFGpuDriverPciDeviceId pciDeviceId;
3536
uint64_t luid;
37+
int32_t index;
3638
} FFGpuDriverCondition;
3739

3840
// detect x if not NULL
@@ -50,5 +52,6 @@ typedef struct FFGpuDriverResult
5052
} FFGpuDriverResult;
5153

5254
const char* ffDetectNvidiaGpuInfo(const FFGpuDriverCondition* cond, FFGpuDriverResult result, const char* soName);
55+
const char* ffDetectNvidiaGpuCount(uint32_t* result, const char* soName);
5356
const char* ffDetectIntelGpuInfo(const FFGpuDriverCondition* cond, FFGpuDriverResult result, const char* soName);
5457
const char* ffDetectAmdGpuInfo(const FFGpuDriverCondition* cond, FFGpuDriverResult result, const char* soName);

src/detection/gpu/gpu_nvidia.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,57 @@ struct FFNvmlData {
2222
bool inited;
2323
} nvmlData;
2424

25+
const char* ffDetectNvidiaGpuCount(uint32_t* result, const char* soName)
26+
{
27+
#ifndef FF_DISABLE_DLOPEN
28+
29+
if (!nvmlData.inited)
30+
{
31+
nvmlData.inited = true;
32+
FF_LIBRARY_LOAD(libnvml, NULL, "dlopen nvml failed", soName , 1);
33+
FF_LIBRARY_LOAD_SYMBOL_MESSAGE(libnvml, nvmlInit_v2)
34+
FF_LIBRARY_LOAD_SYMBOL_MESSAGE(libnvml, nvmlShutdown)
35+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetCount_v2)
36+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetHandleByIndex_v2)
37+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetHandleByPciBusId_v2)
38+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetPciInfo_v3)
39+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetTemperature)
40+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetMemoryInfo_v2)
41+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetMemoryInfo)
42+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetNumGpuCores)
43+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetMaxClockInfo)
44+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetBrand)
45+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetUtilizationRates)
46+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetUUID)
47+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetIndex)
48+
FF_LIBRARY_LOAD_SYMBOL_VAR_MESSAGE(libnvml, nvmlData, nvmlDeviceGetName)
49+
50+
if (ffnvmlInit_v2() != NVML_SUCCESS)
51+
{
52+
nvmlData.ffnvmlDeviceGetNumGpuCores = NULL;
53+
return "nvmlInit_v2() failed";
54+
}
55+
atexit((void*) ffnvmlShutdown);
56+
libnvml = NULL; // don't close nvml
57+
}
58+
59+
if (nvmlData.ffnvmlDeviceGetNumGpuCores == NULL)
60+
return "loading nvml library failed";
61+
62+
uint32_t count;
63+
if (nvmlData.ffnvmlDeviceGetCount_v2(&count) != NVML_SUCCESS)
64+
return "nvmlDeviceGetCount_v2() failed";
65+
66+
*result = count;
67+
return NULL;
68+
#else
69+
70+
FF_UNUSED(cond, result, soName);
71+
return "dlopen is disabled";
72+
73+
#endif
74+
}
75+
2576
const char* ffDetectNvidiaGpuInfo(const FFGpuDriverCondition* cond, FFGpuDriverResult result, const char* soName)
2677
{
2778
#ifndef FF_DISABLE_DLOPEN
@@ -92,6 +143,15 @@ const char* ffDetectNvidiaGpuInfo(const FFGpuDriverCondition* cond, FFGpuDriverR
92143
}
93144
if (!device) return "Device not found";
94145
}
146+
else if (cond->type & FF_GPU_DRIVER_CONDITION_TYPE_INDEX)
147+
{
148+
if (nvmlData.ffnvmlDeviceGetHandleByIndex_v2((unsigned int)cond->index, &device) != NVML_SUCCESS)
149+
return "nvmlDeviceGetHandleByIndex_v2() failed";
150+
}
151+
else
152+
{
153+
return "Unknown condition type";
154+
}
95155

96156
nvmlBrandType_t brand;
97157
if (nvmlData.ffnvmlDeviceGetBrand(device, &brand) == NVML_SUCCESS)

src/detection/gpu/gpu_windows.c

Lines changed: 31 additions & 207 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,6 @@
55

66
#include <inttypes.h>
77

8-
static int isGpuNameEqual(const FFGPUResult* gpu, const FFstrbuf* name)
9-
{
10-
return ffStrbufEqual(&gpu->name, name);
11-
}
12-
13-
static int isDeviceIdEqual(const FFWindowGPUPci* pci, const FFstrbuf* deviceId)
14-
{
15-
return ffStrbufEqual(&pci->deviceId, deviceId);
16-
}
17-
18-
static int isBusEqual(const uint32_t* bus, uint32_t* bus2)
19-
{
20-
return *bus == *bus2;
21-
}
22-
238
static inline bool getDriverSpecificDetectionFn(const char* vendor, __typeof__(&ffDetectNvidiaGpuInfo)* pDetectFn, const char** pDllName)
249
{
2510
if (vendor == FF_GPU_VENDOR_NAME_NVIDIA)
@@ -57,203 +42,41 @@ static inline bool getDriverSpecificDetectionFn(const char* vendor, __typeof__(&
5742

5843
const char* ffDetectGPUImpl(FF_MAYBE_UNUSED const FFGPUOptions* options, FFlist* gpus)
5944
{
60-
DISPLAY_DEVICEW displayDevice = { .cb = sizeof(displayDevice) };
61-
wchar_t regDirectxKey[MAX_PATH] = L"SOFTWARE\\Microsoft\\DirectX\\{";
62-
const uint32_t regDirectxKeyPrefixLength = (uint32_t) wcslen(regDirectxKey);
63-
wchar_t regControlVideoKey[MAX_PATH] = L"SYSTEM\\CurrentControlSet\\Control\\Video\\{";
64-
const uint32_t regControlVideoKeyPrefixLength = (uint32_t) wcslen(regControlVideoKey);
65-
const uint32_t deviceKeyPrefixLength = strlen("\\Registry\\Machine\\") + regControlVideoKeyPrefixLength;
66-
67-
FF_LIST_AUTO_DESTROY pcis = ffListCreate(sizeof (FFWindowGPUPci));
68-
69-
for (DWORD i = 0; EnumDisplayDevicesW(NULL, i, &displayDevice, 0); ++i)
70-
{
71-
if (displayDevice.StateFlags & DISPLAY_DEVICE_MIRRORING_DRIVER) continue;
72-
73-
const uint32_t deviceKeyLength = (uint32_t) wcslen(displayDevice.DeviceKey);
74-
if (__builtin_expect(deviceKeyLength == 100, true))
75-
{
76-
if (wmemcmp(&displayDevice.DeviceKey[deviceKeyLength - 4], L"0000", 4) != 0) continue;
77-
}
78-
else
79-
{
80-
// DeviceKey can be empty. See #484
81-
FF_STRBUF_AUTO_DESTROY gpuName = ffStrbufCreateWS(displayDevice.DeviceString);
82-
if (ffListContains(gpus, &gpuName, (void*) isGpuNameEqual)) continue;
83-
}
84-
85-
// See: https://download.nvidia.com/XFree86/Linux-x86_64/545.23.06/README/supportedchips.html
86-
// displayDevice.DeviceID = MatchingDeviceId "PCI\\VEN_10DE&DEV_2782&SUBSYS_513417AA&REV_A1"
87-
unsigned vendorId = 0, deviceId = 0, subSystemId = 0, revId = 0;
88-
swscanf(displayDevice.DeviceID, L"PCI\\VEN_%x&DEV_%x&SUBSYS_%x&REV_%x", &vendorId, &deviceId, &subSystemId, &revId);
89-
90-
wchar_t regEnumPciKey[MAX_PATH] = L"SYSTEM\\CurrentControlSet\\Enum\\";
91-
const uint32_t regEnumPciKeyPrefixLength = (uint32_t) wcslen(regEnumPciKey);
92-
9345

94-
FF_STRBUF_AUTO_DESTROY pciStrBuf = ffStrbufCreateWS(displayDevice.DeviceID);
95-
size_t deviceIDLength = wcslen(displayDevice.DeviceID);
96-
97-
if (regEnumPciKeyPrefixLength + deviceIDLength >= MAX_PATH) {
98-
continue;
99-
}
100-
wmemcpy(regEnumPciKey + regEnumPciKeyPrefixLength, displayDevice.DeviceID, deviceIDLength);
101-
102-
uint32_t pciBusId = FF_GPU_BUS_UNSET;
103-
uint32_t pciIndex = 0;
104-
pciIndex = ffListFirstIndexComp(&pcis, &pciStrBuf, (void*) isDeviceIdEqual);
105-
if (pciIndex != pcis.length && pcis.length > 0) {
106-
FFWindowGPUPci* gpuPci = (FFWindowGPUPci*)ffListGet(&pcis, pciIndex);
107-
for (uint32_t i = 0; i < gpuPci->busNumber; ++i)
46+
// temporary support nvidia.
47+
const char* dllName = "nvml.dll";
48+
49+
uint32_t gpuCount = FF_GPU_COUNT_UNSET;
50+
if (!ffDetectNvidiaGpuCount(&gpuCount, dllName)){
51+
52+
for (uint32_t i = 0; i < gpuCount; ++i){
53+
54+
FFGPUResult* gpu = (FFGPUResult*)ffListAdd(gpus);
55+
ffStrbufInitStatic(&gpu->vendor, FF_GPU_VENDOR_NAME_NVIDIA);
56+
ffStrbufInitStatic(&gpu->platformApi, "NVML");
57+
58+
ffStrbufInit(&gpu->name);
59+
ffStrbufInit(&gpu->uuid);
60+
ffStrbufInit(&gpu->driver);
61+
62+
gpu->index = FF_GPU_INDEX_UNSET;
63+
gpu->temperature = FF_GPU_TEMP_UNSET;
64+
gpu->coreCount = FF_GPU_CORE_COUNT_UNSET;
65+
gpu->type = FF_GPU_TYPE_UNKNOWN;
66+
gpu->dedicated.total = gpu->dedicated.used = gpu->shared.total = gpu->shared.used = FF_GPU_VMEM_SIZE_UNSET;
67+
gpu->deviceId = 0;
68+
gpu->frequency = FF_GPU_FREQUENCY_UNSET;
69+
gpu->coreUtilizationRate = FF_GPU_CORE_UTILIZATION_RATE_UNSET;
70+
71+
__typeof__(&ffDetectNvidiaGpuInfo) detectFn;
72+
73+
if (getDriverSpecificDetectionFn(gpu->vendor.chars, &detectFn, &dllName) && (options->temp || options->driverSpecific))
10874
{
109-
uint32_t* currentPciBusId = ffListGet(&gpuPci->buses, i);
110-
if (!ffListContains(&gpuPci->usedBuses, currentPciBusId, (void*) isBusEqual)) {
111-
pciBusId = *currentPciBusId;
112-
*(uint32_t*) ffListAdd(&gpuPci->usedBuses) = pciBusId;
113-
break;
114-
}
115-
}
116-
117-
} else {
118-
FFWindowGPUPci* gpuPci = (FFWindowGPUPci*)ffListAdd(&pcis);
119-
gpuPci->deviceId = pciStrBuf;
120-
ffListInit(&gpuPci->buses, sizeof(uint32_t));
121-
ffListInit(&gpuPci->usedBuses, sizeof(uint32_t));
122-
123-
FF_HKEY_AUTO_DESTROY hKey = NULL;
124-
if(ffRegOpenKeyForRead(HKEY_LOCAL_MACHINE, regEnumPciKey, &hKey, NULL))
125-
{
126-
if (!ffRegGetNSubKeys(hKey, &gpuPci->busNumber, NULL))
127-
{
128-
continue;
129-
}
130-
13175

132-
for (uint32_t i = 0; i < gpuPci->busNumber; ++i)
133-
{
134-
FF_STRBUF_AUTO_DESTROY subKey = ffStrbufCreate();
135-
if (!ffRegGetSubKey(hKey, i, &subKey, NULL)) {
136-
continue;
137-
}
138-
139-
wchar_t* widePciDeviceKey = ffStrbufToWideChar(&subKey);
140-
if (widePciDeviceKey == NULL) {
141-
continue;
142-
}
143-
144-
FF_HKEY_AUTO_DESTROY pciDevicHKey = NULL;
145-
if (!ffRegOpenKeyForRead(hKey, widePciDeviceKey, &pciDevicHKey, NULL))
146-
{
147-
free(widePciDeviceKey);
148-
continue;
149-
}
150-
free(widePciDeviceKey);
151-
152-
// LocationInformation example: @System32\\drivers\\pci.sys,#65536;PCI bus %1, device %2, function %3;(114,0,0)
153-
FF_STRBUF_AUTO_DESTROY locationInformation = ffStrbufCreate();
154-
if (!ffRegReadStrbuf(pciDevicHKey, L"LocationInformation", &locationInformation, NULL)){
155-
continue;
156-
}
157-
158-
int busId = -1;
159-
if (sscanf(locationInformation.chars, "@System32\\drivers\\pci.sys,#65536;PCI bus %%1, device %%2, function %%3;(%d", &busId) == 1) {
160-
*(uint32_t*) ffListAdd(&gpuPci->buses) = (uint32_t)busId;
161-
}
162-
}
163-
164-
if (gpuPci->buses.length > 0) {
165-
uint32_t* busId = ffListGet(&gpuPci->buses, 0);
166-
pciBusId = *busId;
167-
*(uint32_t*) ffListAdd(&gpuPci->usedBuses) = *busId;
168-
}
169-
}
170-
}
171-
172-
FFGPUResult* gpu = (FFGPUResult*)ffListAdd(gpus);
173-
ffStrbufInitStatic(&gpu->vendor, ffGetGPUVendorString(vendorId));
174-
ffStrbufInitWS(&gpu->name, displayDevice.DeviceString);
175-
ffStrbufInit(&gpu->uuid);
176-
ffStrbufInit(&gpu->driver);
177-
ffStrbufInitStatic(&gpu->platformApi, "Direct3D");
178-
gpu->index = FF_GPU_INDEX_UNSET;
179-
gpu->temperature = FF_GPU_TEMP_UNSET;
180-
gpu->coreCount = FF_GPU_CORE_COUNT_UNSET;
181-
gpu->type = FF_GPU_TYPE_UNKNOWN;
182-
gpu->dedicated.total = gpu->dedicated.used = gpu->shared.total = gpu->shared.used = FF_GPU_VMEM_SIZE_UNSET;
183-
gpu->deviceId = 0;
184-
gpu->frequency = FF_GPU_FREQUENCY_UNSET;
185-
gpu->coreUtilizationRate = FF_GPU_CORE_UTILIZATION_RATE_UNSET;
186-
187-
if (deviceKeyLength == 100 && displayDevice.DeviceKey[deviceKeyPrefixLength - 1] == '{')
188-
{
189-
wmemcpy(regControlVideoKey + regControlVideoKeyPrefixLength, displayDevice.DeviceKey + deviceKeyPrefixLength, strlen("00000000-0000-0000-0000-000000000000}\\0000"));
190-
FF_HKEY_AUTO_DESTROY hKey = NULL;
191-
if (!ffRegOpenKeyForRead(HKEY_LOCAL_MACHINE, regControlVideoKey, &hKey, NULL)) continue;
192-
193-
ffRegReadStrbuf(hKey, L"DriverVersion", &gpu->driver, NULL);
194-
195-
wmemcpy(regDirectxKey + regDirectxKeyPrefixLength, displayDevice.DeviceKey + deviceKeyPrefixLength, strlen("00000000-0000-0000-0000-000000000000}"));
196-
FF_HKEY_AUTO_DESTROY hDirectxKey = NULL;
197-
if (ffRegOpenKeyForRead(HKEY_LOCAL_MACHINE, regDirectxKey, &hDirectxKey, NULL))
198-
{
199-
uint64_t dedicatedVideoMemory = 0;
200-
if(ffRegReadUint64(hDirectxKey, L"DedicatedVideoMemory", &dedicatedVideoMemory, NULL))
201-
gpu->type = dedicatedVideoMemory >= 1024 * 1024 * 1024 ? FF_GPU_TYPE_DISCRETE : FF_GPU_TYPE_INTEGRATED;
202-
203-
uint64_t dedicatedSystemMemory, sharedSystemMemory;
204-
if(ffRegReadUint64(hDirectxKey, L"DedicatedSystemMemory", &dedicatedSystemMemory, NULL) &&
205-
ffRegReadUint64(hDirectxKey, L"SharedSystemMemory", &sharedSystemMemory, NULL))
206-
{
207-
gpu->dedicated.total = dedicatedVideoMemory + dedicatedSystemMemory;
208-
gpu->shared.total = sharedSystemMemory;
209-
}
210-
211-
ffRegReadUint64(hDirectxKey, L"AdapterLuid", &gpu->deviceId, NULL);
212-
213-
uint32_t featureLevel = 0;
214-
if(ffRegReadUint(hDirectxKey, L"MaxD3D12FeatureLevel", &featureLevel, NULL) && featureLevel)
215-
ffStrbufSetF(&gpu->platformApi, "Direct3D 12.%u", (featureLevel & 0x0F00) >> 8);
216-
else if(ffRegReadUint(hDirectxKey, L"MaxD3D11FeatureLevel", &featureLevel, NULL) && featureLevel)
217-
ffStrbufSetF(&gpu->platformApi, "Direct3D 11.%u", (featureLevel & 0x0F00) >> 8);
218-
}
219-
else if (!ffRegReadUint64(hKey, L"HardwareInformation.qwMemorySize", &gpu->dedicated.total, NULL))
220-
{
221-
uint32_t vmem = 0;
222-
if (ffRegReadUint(hKey, L"HardwareInformation.MemorySize", &vmem, NULL))
223-
gpu->dedicated.total = vmem;
224-
gpu->type = gpu->dedicated.total > 1024 * 1024 * 1024 ? FF_GPU_TYPE_DISCRETE : FF_GPU_TYPE_INTEGRATED;
225-
}
226-
227-
if (gpu->vendor.length == 0)
228-
{
229-
ffRegReadStrbuf(hKey, L"ProviderName", &gpu->vendor, NULL);
230-
if (ffStrbufContainS(&gpu->vendor, "Intel"))
231-
ffStrbufSetStatic(&gpu->vendor, FF_GPU_VENDOR_NAME_INTEL);
232-
else if (ffStrbufContainS(&gpu->vendor, "NVIDIA"))
233-
ffStrbufSetStatic(&gpu->vendor, FF_GPU_VENDOR_NAME_NVIDIA);
234-
else if (ffStrbufContainS(&gpu->vendor, "AMD") || ffStrbufContainS(&gpu->vendor, "ATI"))
235-
ffStrbufSetStatic(&gpu->vendor, FF_GPU_VENDOR_NAME_AMD);
236-
}
237-
}
238-
239-
__typeof__(&ffDetectNvidiaGpuInfo) detectFn;
240-
const char* dllName;
241-
242-
if (getDriverSpecificDetectionFn(gpu->vendor.chars, &detectFn, &dllName) && (options->temp || options->driverSpecific))
243-
{
244-
if (vendorId && deviceId && subSystemId)
245-
{
24676
detectFn(
24777
&(FFGpuDriverCondition){
248-
.type = FF_GPU_DRIVER_CONDITION_TYPE_DEVICE_ID | FF_GPU_DRIVER_CONDITION_TYPE_LUID,
249-
.pciDeviceId = {
250-
.deviceId = deviceId,
251-
.vendorId = vendorId,
252-
.subSystemId = subSystemId,
253-
.revId = revId,
254-
.bus = pciBusId,
255-
},
256-
.luid = gpu->deviceId,
78+
.type = FF_GPU_DRIVER_CONDITION_TYPE_INDEX,
79+
.index = (int32_t)i,
25780
},
25881
(FFGpuDriverResult){
25982
.index = &gpu->index,
@@ -264,6 +87,7 @@ const char* ffDetectGPUImpl(FF_MAYBE_UNUSED const FFGPUOptions* options, FFlist*
26487
.frequency = options->driverSpecific ? &gpu->frequency : NULL,
26588
.coreUtilizationRate = &gpu->coreUtilizationRate,
26689
.uuid = &gpu->uuid,
90+
.name = &gpu->name,
26791
},
26892
dllName);
26993
}

0 commit comments

Comments
 (0)