Skip to content

Commit 6b1dd70

Browse files
authored
Merge pull request #93 from mresvanis/add-mig-configuration
Add MIG config support when MIG-backed vGPU type
2 parents 5f37569 + b1ea9dd commit 6b1dd70

File tree

292 files changed

+103115
-458
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

292 files changed

+103115
-458
lines changed

api/spec/v1/spec.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ import (
2020
"encoding/json"
2121
"fmt"
2222

23+
migpartedv1 "github.com/NVIDIA/mig-parted/api/spec/v1"
24+
migtypes "github.com/NVIDIA/mig-parted/pkg/types"
25+
2326
"github.com/NVIDIA/vgpu-device-manager/pkg/types"
2427
)
2528

@@ -160,6 +163,44 @@ func (s *VGPUConfigSpec) UnmarshalJSON(b []byte) error {
160163
return nil
161164
}
162165

166+
func (s VGPUConfigSpecSlice) ToMigConfigSpecSlice() (migpartedv1.MigConfigSpecSlice, error) {
167+
var migConfigSpecs migpartedv1.MigConfigSpecSlice
168+
169+
for _, vgpuSpec := range s {
170+
migSpec := migpartedv1.MigConfigSpec{
171+
DeviceFilter: vgpuSpec.DeviceFilter,
172+
Devices: vgpuSpec.Devices,
173+
MigDevices: make(migtypes.MigConfig),
174+
}
175+
176+
migEnabled := false
177+
for vgpuType := range vgpuSpec.VGPUDevices {
178+
vgpu, err := types.ParseVGPUType(vgpuType)
179+
if err != nil {
180+
return nil, fmt.Errorf("failed to parse vGPU type %s: %w", vgpuType, err)
181+
}
182+
183+
if vgpu.G > 0 {
184+
migEnabled = true
185+
migProfile := fmt.Sprintf("%dg.%dgb", vgpu.G, vgpu.GB)
186+
for _, attr := range vgpu.Attr {
187+
if attr == types.AttributeMediaExtensions {
188+
migProfile += ".me"
189+
break
190+
}
191+
}
192+
migSpec.MigDevices[migProfile] = vgpuSpec.VGPUDevices[vgpuType]
193+
}
194+
}
195+
196+
migSpec.MigEnabled = migEnabled
197+
198+
migConfigSpecs = append(migConfigSpecs, migSpec)
199+
}
200+
201+
return migConfigSpecs, nil
202+
}
203+
163204
func containsKey(m map[string]json.RawMessage, s string) bool {
164205
_, exists := m[s]
165206
return exists

api/spec/v1/spec_test.go

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ import (
2121

2222
"github.com/stretchr/testify/require"
2323
"sigs.k8s.io/yaml"
24+
25+
migpartedv1 "github.com/NVIDIA/mig-parted/api/spec/v1"
26+
migtypes "github.com/NVIDIA/mig-parted/pkg/types"
27+
28+
"github.com/NVIDIA/vgpu-device-manager/pkg/types"
2429
)
2530

2631
func TestSpec(t *testing.T) {
@@ -230,3 +235,190 @@ func TestVGPUConfigSpec(t *testing.T) {
230235
}
231236

232237
}
238+
239+
func TestVGPUConfigSpecSliceToMigConfigSpecSlice(t *testing.T) {
240+
testCases := []struct {
241+
Description string
242+
VGPUConfigSpecSlice VGPUConfigSpecSlice
243+
ExpectedMigConfigSpec migpartedv1.MigConfigSpecSlice
244+
ExpectedError string
245+
}{
246+
{
247+
"Empty slice",
248+
VGPUConfigSpecSlice{},
249+
nil,
250+
"",
251+
},
252+
{
253+
"Single MIG-backed vGPU type",
254+
VGPUConfigSpecSlice{
255+
{
256+
DeviceFilter: "MODEL",
257+
Devices: "all",
258+
VGPUDevices: types.VGPUConfig{
259+
"A100-1-5C": 4,
260+
},
261+
},
262+
},
263+
migpartedv1.MigConfigSpecSlice{
264+
{
265+
DeviceFilter: "MODEL",
266+
Devices: "all",
267+
MigEnabled: true,
268+
MigDevices: migtypes.MigConfig{
269+
"1g.5gb": 4,
270+
},
271+
},
272+
},
273+
"",
274+
},
275+
{
276+
"Multiple MIG-backed vGPU types",
277+
VGPUConfigSpecSlice{
278+
{
279+
DeviceFilter: []string{"MODEL1", "MODEL2"},
280+
Devices: []int{0, 1},
281+
VGPUDevices: types.VGPUConfig{
282+
"A100-1-5C": 2,
283+
"A100-2-10C": 1,
284+
},
285+
},
286+
},
287+
migpartedv1.MigConfigSpecSlice{
288+
{
289+
DeviceFilter: []string{"MODEL1", "MODEL2"},
290+
Devices: []int{0, 1},
291+
MigEnabled: true,
292+
MigDevices: migtypes.MigConfig{
293+
"1g.5gb": 2,
294+
"2g.10gb": 1,
295+
},
296+
},
297+
},
298+
"",
299+
},
300+
{
301+
"MIG-backed vGPU type with media extensions",
302+
VGPUConfigSpecSlice{
303+
{
304+
Devices: "all",
305+
VGPUDevices: types.VGPUConfig{
306+
"A100-1-5CME": 2,
307+
},
308+
},
309+
},
310+
migpartedv1.MigConfigSpecSlice{
311+
{
312+
Devices: "all",
313+
MigEnabled: true,
314+
MigDevices: migtypes.MigConfig{
315+
"1g.5gb.me": 2,
316+
},
317+
},
318+
},
319+
"",
320+
},
321+
{
322+
"Non-MIG vGPU type",
323+
VGPUConfigSpecSlice{
324+
{
325+
Devices: "all",
326+
VGPUDevices: types.VGPUConfig{
327+
"A100-40C": 2,
328+
},
329+
},
330+
},
331+
migpartedv1.MigConfigSpecSlice{
332+
{
333+
Devices: "all",
334+
MigEnabled: false,
335+
MigDevices: migtypes.MigConfig{},
336+
},
337+
},
338+
"",
339+
},
340+
{
341+
"Mixed MIG and non-MIG vGPU types",
342+
VGPUConfigSpecSlice{
343+
{
344+
Devices: "all",
345+
VGPUDevices: types.VGPUConfig{
346+
"A100-40C": 1,
347+
"A100-1-5C": 2,
348+
},
349+
},
350+
},
351+
migpartedv1.MigConfigSpecSlice{
352+
{
353+
Devices: "all",
354+
MigEnabled: true,
355+
MigDevices: migtypes.MigConfig{
356+
"1g.5gb": 2,
357+
},
358+
},
359+
},
360+
"",
361+
},
362+
{
363+
"Multiple specs with different configurations",
364+
VGPUConfigSpecSlice{
365+
{
366+
Devices: "all",
367+
VGPUDevices: types.VGPUConfig{
368+
"A100-1-5C": 4,
369+
},
370+
},
371+
{
372+
DeviceFilter: "MODEL",
373+
Devices: []int{0, 1},
374+
VGPUDevices: types.VGPUConfig{
375+
"A100-40C": 1,
376+
},
377+
},
378+
},
379+
migpartedv1.MigConfigSpecSlice{
380+
{
381+
Devices: "all",
382+
MigEnabled: true,
383+
MigDevices: migtypes.MigConfig{
384+
"1g.5gb": 4,
385+
},
386+
},
387+
{
388+
DeviceFilter: "MODEL",
389+
Devices: []int{0, 1},
390+
MigEnabled: false,
391+
MigDevices: migtypes.MigConfig{},
392+
},
393+
},
394+
"",
395+
},
396+
{
397+
"Invalid vGPU type",
398+
VGPUConfigSpecSlice{
399+
{
400+
Devices: "all",
401+
VGPUDevices: types.VGPUConfig{
402+
"InvalidType": 1,
403+
},
404+
},
405+
},
406+
nil,
407+
"failed to parse vGPU type InvalidType:",
408+
},
409+
}
410+
411+
for _, tc := range testCases {
412+
t.Run(tc.Description, func(t *testing.T) {
413+
result, err := tc.VGPUConfigSpecSlice.ToMigConfigSpecSlice()
414+
if tc.ExpectedError != "" {
415+
require.NotNil(t, err, "Expected failure but got success")
416+
require.Nil(t, result, "Expected nil result on failure")
417+
require.ErrorContains(t, err, tc.ExpectedError)
418+
} else {
419+
require.Nil(t, err, "Unexpected failure: %v", err)
420+
require.Equal(t, tc.ExpectedMigConfigSpec, result, "Unexpected result")
421+
}
422+
})
423+
}
424+
}

cmd/nvidia-k8s-vgpu-dm/find.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
# Copyright 2025 NVIDIA CORPORATION
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"fmt"
21+
"path/filepath"
22+
)
23+
24+
type root string
25+
26+
// getDriverLibraryPath returns path to `libnvidia-ml.so.1` in the driver root.
27+
// The folder for this file is also expected to be the location of other driver files.
28+
func (r root) getDriverLibraryPath() (string, error) {
29+
librarySearchPaths := []string{
30+
"/usr/lib64",
31+
"/usr/lib/x86_64-linux-gnu",
32+
"/usr/lib/aarch64-linux-gnu",
33+
"/lib64",
34+
"/lib/x86_64-linux-gnu",
35+
"/lib/aarch64-linux-gnu",
36+
}
37+
38+
libraryPath, err := r.findFile("libnvidia-ml.so.1", librarySearchPaths...)
39+
if err != nil {
40+
return "", err
41+
}
42+
43+
return libraryPath, nil
44+
}
45+
46+
// findFile searches the root for a specified file.
47+
// A number of folders can be specified to search in addition to the root itself.
48+
// If the file represents a symlink, this is resolved and the final path is returned.
49+
func (r root) findFile(name string, searchIn ...string) (string, error) {
50+
for _, d := range append([]string{"/"}, searchIn...) {
51+
l := filepath.Join(string(r), d, name)
52+
candidate, err := resolveLink(l)
53+
if err != nil {
54+
continue
55+
}
56+
return candidate, nil
57+
}
58+
59+
return "", fmt.Errorf("error locating %q", name)
60+
}
61+
62+
// resolveLink finds the target of a symlink or the file itself in the
63+
// case of a regular file.
64+
// This is equivalent to running `readlink -f ${l}`.
65+
func resolveLink(l string) (string, error) {
66+
resolved, err := filepath.EvalSymlinks(l)
67+
if err != nil {
68+
return "", fmt.Errorf("error resolving link '%s': %w", l, err)
69+
}
70+
return resolved, nil
71+
}

0 commit comments

Comments
 (0)