Skip to content

Commit 4e59b95

Browse files
committed
Add the memory addressing mode as an attribute for each device
On coherent-memory systems, the possible modes are: * HMM - (Hardware Memory Management mode) * ATS - (Address Translation Service mode) * None - (Supported by the platform but currently inactive) On others, its explicitly set to empty to indicate that it is not supported by the platform Signed-off-by: Shiva Krishna, Merla <smerla@nvidia.com>
1 parent 869744f commit 4e59b95

File tree

2 files changed

+19
-0
lines changed

2 files changed

+19
-0
lines changed

cmd/gpu-kubelet-plugin/deviceinfo.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ type GpuInfo struct {
5353
pcieRootAttr *deviceattribute.DeviceAttribute
5454
migProfiles []*MigProfileInfo
5555
Health HealthStatus
56+
addressingMode string
5657
}
5758

5859
type MigDeviceInfo struct {
@@ -139,6 +140,9 @@ func (d *GpuInfo) GetDevice() resourceapi.Device {
139140
"pcieBusID": {
140141
StringValue: &d.pcieBusID,
141142
},
143+
"addressingMode": {
144+
StringValue: &d.addressingMode,
145+
},
142146
},
143147
Capacity: map[resourceapi.QualifiedName]resourceapi.DeviceCapacity{
144148
"memory": {
@@ -189,6 +193,9 @@ func (d *MigDeviceInfo) GetDevice() resourceapi.Device {
189193
"pcieBusID": {
190194
StringValue: &d.pcieBusID,
191195
},
196+
"addressingMode": {
197+
StringValue: &d.parent.addressingMode,
198+
},
192199
},
193200
Capacity: map[resourceapi.QualifiedName]resourceapi.DeviceCapacity{
194201
"multiprocessors": {

cmd/gpu-kubelet-plugin/nvlib.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,17 @@ func (l deviceLib) getGpuInfo(index int, device nvdev.Device) (*GpuInfo, error)
304304
return nil, fmt.Errorf("error getting PCIe bus ID for device %d: %w", index, err)
305305
}
306306

307+
// Get the memory-addressing mode supported by the device.
308+
// On coherent-memory systems, the possible modes are:
309+
// - HMM (Hardware Memory Management)
310+
// - ATS (Address Translation Service)
311+
// - None (Supported by the platform but currently inactive)
312+
// - "" (Not supported by the platform)
313+
addressingMode, err := device.GetAddressingModeAsString()
314+
if err != nil {
315+
return nil, fmt.Errorf("error getting addressing mode for device %d: %w", index, err)
316+
}
317+
307318
var pcieRootAttr *deviceattribute.DeviceAttribute
308319
if attr, err := deviceattribute.GetPCIeRootAttributeByPCIBusID(pcieBusID); err == nil {
309320
pcieRootAttr = &attr
@@ -379,6 +390,7 @@ func (l deviceLib) getGpuInfo(index int, device nvdev.Device) (*GpuInfo, error)
379390
pcieRootAttr: pcieRootAttr,
380391
migProfiles: migProfiles,
381392
Health: Healthy,
393+
addressingMode: addressingMode,
382394
}
383395

384396
return gpuInfo, nil

0 commit comments

Comments
 (0)