Skip to content

Commit 11640a6

Browse files
authored
fix: rename VGPU functions to Accel for consistency across platforms (#562)
- Updated function names from VGPUInit, VGPUShutdown, and related functions to AccelInit, AccelShutdown, and their variants in both Unix and Windows implementations. - Adjusted error messages to reflect the new function names. - Updated corresponding tests and example implementations to use the new function names for better clarity and consistency.
1 parent bd2643e commit 11640a6

File tree

6 files changed

+249
-233
lines changed

6 files changed

+249
-233
lines changed

pkg/hypervisor/device/accelerator.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,8 @@ type SnapshotContext struct {
225225
var (
226226
libHandle uintptr
227227
// DeviceInfo APIs
228-
vgpuInit func() Result
229-
vgpuShutdown func() Result
228+
accelInit func() Result
229+
accelShutdown func() Result
230230
getDeviceCount func(*uintptr) Result
231231
getAllDevices func(*ExtendedDeviceInfo, uintptr, *uintptr) Result
232232
getAllDevicesTopology func(*ExtendedDeviceTopology) Result
@@ -286,7 +286,7 @@ func (a *AcceleratorInterface) Close() error {
286286
_ = r // ignore recovery value
287287
}
288288
}()
289-
vgpuShutdown()
289+
accelShutdown()
290290
if registerLogCallback != nil {
291291
registerLogCallback(0)
292292
}

pkg/hypervisor/device/accelerator_unix.go

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,34 +40,34 @@ func (a *AcceleratorInterface) Load() error {
4040
libHandle = handle
4141

4242
// Register all required functions - names must match C header exactly
43-
purego.RegisterLibFunc(&vgpuInit, handle, "VGPUInit")
44-
purego.RegisterLibFunc(&vgpuShutdown, handle, "VGPUShutdown")
45-
purego.RegisterLibFunc(&getDeviceCount, handle, "GetDeviceCount")
46-
purego.RegisterLibFunc(&getAllDevices, handle, "GetAllDevices")
47-
purego.RegisterLibFunc(&getAllDevicesTopology, handle, "GetAllDevicesTopology")
48-
purego.RegisterLibFunc(&assignPartition, handle, "AssignPartition")
49-
purego.RegisterLibFunc(&removePartition, handle, "RemovePartition")
50-
purego.RegisterLibFunc(&setMemHardLimit, handle, "SetMemHardLimit")
51-
purego.RegisterLibFunc(&setComputeUnitHardLimit, handle, "SetComputeUnitHardLimit")
52-
purego.RegisterLibFunc(&snapshot, handle, "Snapshot")
53-
purego.RegisterLibFunc(&resume, handle, "Resume")
54-
purego.RegisterLibFunc(&getProcessInformation, handle, "GetProcessInformation")
55-
purego.RegisterLibFunc(&getDeviceMetrics, handle, "GetDeviceMetrics")
56-
purego.RegisterLibFunc(&getVendorMountLibs, handle, "GetVendorMountLibs")
43+
purego.RegisterLibFunc(&accelInit, handle, "AccelInit")
44+
purego.RegisterLibFunc(&accelShutdown, handle, "AccelShutdown")
45+
purego.RegisterLibFunc(&getDeviceCount, handle, "AccelGetDeviceCount")
46+
purego.RegisterLibFunc(&getAllDevices, handle, "AccelGetAllDevices")
47+
purego.RegisterLibFunc(&getAllDevicesTopology, handle, "AccelGetAllDevicesTopology")
48+
purego.RegisterLibFunc(&assignPartition, handle, "AccelAssignPartition")
49+
purego.RegisterLibFunc(&removePartition, handle, "AccelRemovePartition")
50+
purego.RegisterLibFunc(&setMemHardLimit, handle, "AccelSetMemHardLimit")
51+
purego.RegisterLibFunc(&setComputeUnitHardLimit, handle, "AccelSetComputeUnitHardLimit")
52+
purego.RegisterLibFunc(&snapshot, handle, "AccelSnapshot")
53+
purego.RegisterLibFunc(&resume, handle, "AccelResume")
54+
purego.RegisterLibFunc(&getProcessInformation, handle, "AccelGetProcessInformation")
55+
purego.RegisterLibFunc(&getDeviceMetrics, handle, "AccelGetDeviceMetrics")
56+
purego.RegisterLibFunc(&getVendorMountLibs, handle, "AccelGetVendorMountLibs")
5757

5858
// Register log callback only on non-macOS platforms
5959
// purego callback has issues on macOS ARM64, causing bus errors when C code calls back into Go
6060
if runtime.GOOS != "darwin" {
61-
purego.RegisterLibFunc(&registerLogCallback, handle, "RegisterLogCallback")
61+
purego.RegisterLibFunc(&registerLogCallback, handle, "AccelRegisterLogCallback")
6262
callback := purego.NewCallback(goLogCallback)
6363
if result := registerLogCallback(callback); result != ResultSuccess {
6464
klog.Warningf("Failed to register log callback: %d", result)
6565
}
6666
}
6767

68-
result := vgpuInit()
68+
result := accelInit()
6969
if result != ResultSuccess {
70-
return fmt.Errorf("failed to initialize VGPU: %d", result)
70+
return fmt.Errorf("failed to initialize accelerator: %d", result)
7171
}
7272

7373
a.loaded = true

pkg/hypervisor/device/accelerator_windows.go

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,27 +39,27 @@ func (a *AcceleratorInterface) Load() error {
3939
libHandle = uintptr(dll.Handle)
4040

4141
// purego.RegisterLibFunc works with Windows DLL handles - names must match C header exactly
42-
purego.RegisterLibFunc(&vgpuInit, libHandle, "VGPUInit")
43-
purego.RegisterLibFunc(&vgpuShutdown, libHandle, "VGPUShutdown")
44-
purego.RegisterLibFunc(&getDeviceCount, libHandle, "GetDeviceCount")
45-
purego.RegisterLibFunc(&getAllDevices, libHandle, "GetAllDevices")
46-
purego.RegisterLibFunc(&getAllDevicesTopology, libHandle, "GetAllDevicesTopology")
47-
purego.RegisterLibFunc(&assignPartition, libHandle, "AssignPartition")
48-
purego.RegisterLibFunc(&removePartition, libHandle, "RemovePartition")
49-
purego.RegisterLibFunc(&setMemHardLimit, libHandle, "SetMemHardLimit")
50-
purego.RegisterLibFunc(&setComputeUnitHardLimit, libHandle, "SetComputeUnitHardLimit")
51-
purego.RegisterLibFunc(&snapshot, libHandle, "Snapshot")
52-
purego.RegisterLibFunc(&resume, libHandle, "Resume")
53-
purego.RegisterLibFunc(&getProcessInformation, libHandle, "GetProcessInformation")
54-
purego.RegisterLibFunc(&getDeviceMetrics, libHandle, "GetDeviceMetrics")
55-
purego.RegisterLibFunc(&getVendorMountLibs, libHandle, "GetVendorMountLibs")
42+
purego.RegisterLibFunc(&accelInit, libHandle, "AccelInit")
43+
purego.RegisterLibFunc(&accelShutdown, libHandle, "AccelShutdown")
44+
purego.RegisterLibFunc(&getDeviceCount, libHandle, "AccelGetDeviceCount")
45+
purego.RegisterLibFunc(&getAllDevices, libHandle, "AccelGetAllDevices")
46+
purego.RegisterLibFunc(&getAllDevicesTopology, libHandle, "AccelGetAllDevicesTopology")
47+
purego.RegisterLibFunc(&assignPartition, libHandle, "AccelAssignPartition")
48+
purego.RegisterLibFunc(&removePartition, libHandle, "AccelRemovePartition")
49+
purego.RegisterLibFunc(&setMemHardLimit, libHandle, "AccelSetMemHardLimit")
50+
purego.RegisterLibFunc(&setComputeUnitHardLimit, libHandle, "AccelSetComputeUnitHardLimit")
51+
purego.RegisterLibFunc(&snapshot, libHandle, "AccelSnapshot")
52+
purego.RegisterLibFunc(&resume, libHandle, "AccelResume")
53+
purego.RegisterLibFunc(&getProcessInformation, libHandle, "AccelGetProcessInformation")
54+
purego.RegisterLibFunc(&getDeviceMetrics, libHandle, "AccelGetDeviceMetrics")
55+
purego.RegisterLibFunc(&getVendorMountLibs, libHandle, "AccelGetVendorMountLibs")
5656

5757
// Note: Log callback is not registered on Windows due to calling convention differences
5858
// Windows uses stdcall/cdecl while Unix uses System V ABI
5959

60-
result := vgpuInit()
60+
result := accelInit()
6161
if result != ResultSuccess {
62-
return fmt.Errorf("failed to initialize VGPU: %d", result)
62+
return fmt.Errorf("failed to initialize accelerator: %d", result)
6363
}
6464

6565
a.loaded = true

provider/accelerator.h

Lines changed: 53 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,23 @@
2222
#include <stdint.h>
2323
#include <sys/types.h>
2424

25+
#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__MINGW64__) && !defined(__CYGWIN__)
26+
typedef int pid_t;
27+
#endif
28+
2529
#ifdef __cplusplus
2630
extern "C" {
2731
#endif
2832

33+
#if defined(_WIN32)
34+
#if defined(ACCELERATOR_EXPORTS)
35+
#define ACCELERATOR_API __declspec(dllexport)
36+
#else
37+
#define ACCELERATOR_API __declspec(dllimport)
38+
#endif
39+
#else
2940
#define ACCELERATOR_API __attribute__((visibility("default")))
41+
#endif
3042

3143
// ============================================================================
3244
// Common Types
@@ -119,39 +131,39 @@
119131

120132
// Topology level type (GPU-to-GPU connection type)
121133
typedef enum {
122-
TOPO_LEVEL_INTERNAL = 0, // e.g. Tesla K80 (same board)
123-
TOPO_LEVEL_SINGLE_SWITCH = 1, // single PCIe switch
124-
TOPO_LEVEL_MULTI_SWITCH = 2, // multiple PCIe switches (no host bridge traversal)
125-
TOPO_LEVEL_HOST_BRIDGE = 3, // same host bridge
126-
TOPO_LEVEL_NUMA_NODE = 4, // same NUMA node
127-
TOPO_LEVEL_SYSTEM = 5, // cross NUMA (system level)
128-
TOPO_LEVEL_SELF = 6, // same device
129-
TOPO_LEVEL_UNKNOWN = 7 // unknown or error
134+
TOPO_LEVEL_INTERNAL = 0, // e.g. Tesla K80 (same board)
135+
TOPO_LEVEL_SINGLE_SWITCH = 1, // single PCIe switch
136+
TOPO_LEVEL_MULTI_SWITCH = 2, // multiple PCIe switches (no host bridge traversal)
137+
TOPO_LEVEL_HOST_BRIDGE = 3, // same host bridge
138+
TOPO_LEVEL_NUMA_NODE = 4, // same NUMA node
139+
TOPO_LEVEL_SYSTEM = 5, // cross NUMA (system level)
140+
TOPO_LEVEL_SELF = 6, // same device
141+
TOPO_LEVEL_UNKNOWN = 7 // unknown or error
130142
} TopoLevelType;
131143

132144
// Topology node: represents connection to another device
133145
typedef struct {
134-
char peerUUID[64]; // Peer device UUID
135-
int32_t peerIndex; // Peer device index
136-
TopoLevelType topoLevel; // Topology level to this peer
146+
char peerUUID[64]; // Peer device UUID
147+
int32_t peerIndex; // Peer device index
148+
TopoLevelType topoLevel; // Topology level to this peer
137149
} DeviceTopoNode;
138150

139151
// Maximum number of devices in topology matrix
140152
#define MAX_TOPOLOGY_DEVICES 64
141153

142154
// Device topology row: a device and its topology to all other devices
143155
typedef struct {
144-
char deviceUUID[64]; // This device's UUID
145-
int32_t deviceIndex; // This device's index
146-
int32_t numaNode; // This device's NUMA node
147-
DeviceTopoNode peers[MAX_TOPOLOGY_DEVICES]; // Topology to all other devices
148-
size_t peerCount; // Number of peers
156+
char deviceUUID[64]; // This device's UUID
157+
int32_t deviceIndex; // This device's index
158+
int32_t numaNode; // This device's NUMA node
159+
DeviceTopoNode peers[MAX_TOPOLOGY_DEVICES]; // Topology to all other devices
160+
size_t peerCount; // Number of peers
149161
} DeviceTopologyInfo;
150162

151163
// Extended topology
152164
typedef struct {
153-
DeviceTopologyInfo devices[MAX_TOPOLOGY_DEVICES]; // Array of device topology rows
154-
size_t deviceCount; // Number of devices
165+
DeviceTopologyInfo devices[MAX_TOPOLOGY_DEVICES]; // Array of device topology rows
166+
size_t deviceCount; // Number of devices
155167
} ExtendedDeviceTopology;
156168

157169
// ============================================================================
@@ -216,7 +228,7 @@
216228
} DeviceMetrics;
217229

218230
// Device UUID array entry (for passing multiple UUIDs)
219-
// DEPRECATED: This struct is no longer used. GetDeviceMetrics now uses const char** instead.
231+
// DEPRECATED: This struct is no longer used. AccelGetDeviceMetrics now uses const char** instead.
220232
#define MAX_DEVICE_UUIDS 64
221233
#define UUID_STRING_LENGTH 64
222234

@@ -235,8 +247,8 @@
235247

236248
typedef struct {
237249
PartitionResultType type;
238-
char deviceUUID[64]; // Device UUID
239-
char envVars[10][256]; // Array of environment variable key-value pairs, A=B, C=D, etc.
250+
char deviceUUID[64]; // Device UUID
251+
char envVars[10][256]; // Array of environment variable key-value pairs, A=B, C=D, etc.
240252
} PartitionResult;
241253

242254
// ============================================================================
@@ -246,16 +258,19 @@
246258
/**
247259
* Initialize the accelerator library.
248260
*
261+
* This must be called before any other accelerator API. Calls to other APIs
262+
* without a successful AccelInit will trigger a TF_PANIC.
263+
*
249264
* @return ACCEL_SUCCESS on success, error code otherwise
250265
*/
251-
ACCELERATOR_API AccelResult VGPUInit(void);
266+
ACCELERATOR_API AccelResult AccelInit(void);
252267

253268
/**
254269
* Shutdown the accelerator library.
255270
*
256271
* @return ACCEL_SUCCESS on success, error code otherwise
257272
*/
258-
ACCELERATOR_API AccelResult VGPUShutdown(void);
273+
ACCELERATOR_API AccelResult AccelShutdown(void);
259274

260275
// ============================================================================
261276
// DeviceInfo APIs
@@ -267,7 +282,7 @@
267282
* @param deviceCount Output parameter for number of devices
268283
* @return ACCEL_SUCCESS on success, error code otherwise
269284
*/
270-
ACCELERATOR_API AccelResult GetDeviceCount(size_t* deviceCount);
285+
ACCELERATOR_API AccelResult AccelGetDeviceCount(size_t* deviceCount);
271286

272287
/**
273288
* Get all available devices information.
@@ -277,7 +292,7 @@
277292
* @param deviceCount Output parameter for number of devices actually returned
278293
* @return ACCEL_SUCCESS on success, error code otherwise
279294
*/
280-
ACCELERATOR_API AccelResult GetAllDevices(ExtendedDeviceInfo* devices, size_t maxCount, size_t* deviceCount);
295+
ACCELERATOR_API AccelResult AccelGetAllDevices(ExtendedDeviceInfo* devices, size_t maxCount, size_t* deviceCount);
281296

282297
/**
283298
* Get device topology for all devices, including NVLink and PCIe interconnects.
@@ -286,7 +301,7 @@
286301
* @param topology Output parameter for extended topology (allocated by caller)
287302
* @return ACCEL_SUCCESS on success, error code otherwise
288303
*/
289-
ACCELERATOR_API AccelResult GetAllDevicesTopology(ExtendedDeviceTopology* topology);
304+
ACCELERATOR_API AccelResult AccelGetAllDevicesTopology(ExtendedDeviceTopology* topology);
290305

291306
// ============================================================================
292307
// Virtualization APIs - Partitioned Isolation
@@ -300,7 +315,7 @@
300315
* @param partitionResult Output buffer for assigned partition result (callee allocates)
301316
* @return ACCEL_SUCCESS on success, error code otherwise
302317
*/
303-
ACCELERATOR_API AccelResult AssignPartition(const char* templateId, const char* deviceUUID, PartitionResult* partitionResult);
318+
ACCELERATOR_API AccelResult AccelAssignPartition(const char* templateId, const char* deviceUUID, PartitionResult* partitionResult);
304319

305320
/**
306321
* Remove a partition from a device.
@@ -309,7 +324,7 @@
309324
* @param deviceUUID Device UUID
310325
* @return ACCEL_SUCCESS on success, error code otherwise
311326
*/
312-
ACCELERATOR_API AccelResult RemovePartition(const char* templateId, const char* deviceUUID);
327+
ACCELERATOR_API AccelResult AccelRemovePartition(const char* templateId, const char* deviceUUID);
313328

314329
// ============================================================================
315330
// Virtualization APIs - Hard Isolation
@@ -322,7 +337,7 @@
322337
* @param memoryLimitBytes Memory limit in bytes
323338
* @return ACCEL_SUCCESS on success, error code otherwise
324339
*/
325-
ACCELERATOR_API AccelResult SetMemHardLimit(const char* deviceUUID, uint64_t memoryLimitBytes);
340+
ACCELERATOR_API AccelResult AccelSetMemHardLimit(const char* deviceUUID, uint64_t memoryLimitBytes);
326341

327342
/**
328343
* Set hard compute unit limit for a worker (one-time, called at worker start).
@@ -331,7 +346,7 @@
331346
* @param computeUnitLimit Compute unit limit (e.g., percentage 0-100)
332347
* @return ACCEL_SUCCESS on success, error code otherwise
333348
*/
334-
ACCELERATOR_API AccelResult SetComputeUnitHardLimit(const char* deviceUUID, uint32_t computeUnitLimit);
349+
ACCELERATOR_API AccelResult AccelSetComputeUnitHardLimit(const char* deviceUUID, uint32_t computeUnitLimit);
335350

336351
// ============================================================================
337352
// Virtualization APIs - Device Snapshot/Migration
@@ -349,7 +364,7 @@
349364
* @param context Snapshot context containing process IDs and/or device UUID
350365
* @return ACCEL_SUCCESS on success, error code otherwise
351366
*/
352-
ACCELERATOR_API AccelResult Snapshot(SnapshotContext* context);
367+
ACCELERATOR_API AccelResult AccelSnapshot(SnapshotContext* context);
353368

354369
/**
355370
* Resume device/process state (unlock and restore state).
@@ -363,7 +378,7 @@
363378
* @param context Snapshot context containing process IDs and/or device UUID
364379
* @return ACCEL_SUCCESS on success, error code otherwise
365380
*/
366-
ACCELERATOR_API AccelResult Resume(SnapshotContext* context);
381+
ACCELERATOR_API AccelResult AccelResume(SnapshotContext* context);
367382

368383
// ============================================================================
369384
// Metrics APIs
@@ -379,7 +394,7 @@
379394
* @param processInfoCount Output parameter for number of process infos actually returned
380395
* @return ACCEL_SUCCESS on success, error code otherwise
381396
*/
382-
ACCELERATOR_API AccelResult GetProcessInformation(ProcessInformation* processInfos, size_t maxCount, size_t* processInfoCount);
397+
ACCELERATOR_API AccelResult AccelGetProcessInformation(ProcessInformation* processInfos, size_t maxCount, size_t* processInfoCount);
383398

384399
/**
385400
* Get basic device metrics (power, PCIe, SM active, TC usage, etc.).
@@ -389,7 +404,7 @@
389404
* @param metrics Output buffer for device metrics (allocated by caller, size >= deviceCount)
390405
* @return ACCEL_SUCCESS on success, error code otherwise
391406
*/
392-
ACCELERATOR_API AccelResult GetDeviceMetrics(const char** deviceUUIDs, size_t deviceCount, DeviceMetrics* metrics);
407+
ACCELERATOR_API AccelResult AccelGetDeviceMetrics(const char** deviceUUIDs, size_t deviceCount, DeviceMetrics* metrics);
393408

394409
/**
395410
* Get vendor mount libs returns the mount paths for additional device driver or runtime libraries.
@@ -399,7 +414,7 @@
399414
* @param mountCount Output parameter for number of mounts actually returned
400415
* @return ACCEL_SUCCESS on success, error code otherwise
401416
*/
402-
ACCELERATOR_API AccelResult GetVendorMountLibs(MountPath* mounts, size_t maxCount, size_t* mountCount);
417+
ACCELERATOR_API AccelResult AccelGetVendorMountLibs(MountPath* mounts, size_t maxCount, size_t* mountCount);
403418

404419
// ============================================================================
405420
// Utility APIs
@@ -412,10 +427,11 @@
412427
* @param callback Log callback function pointer (can be NULL to unregister)
413428
* @return ACCEL_SUCCESS on success, error code otherwise
414429
*/
415-
ACCELERATOR_API AccelResult RegisterLogCallback(LogCallbackFunc callback);
430+
ACCELERATOR_API AccelResult AccelRegisterLogCallback(LogCallbackFunc callback);
416431

417432
#ifdef __cplusplus
418433
}
419434
#endif
420435

421-
#endif // ACCELERATOR_H
436+
#endif // ACCELERATOR_H
437+

0 commit comments

Comments
 (0)