Skip to content

Commit 55084f7

Browse files
authored
Merge pull request #129 from nojnhuh/profiles
Introduce device profiles
2 parents bb67e6b + d57f671 commit 55084f7

File tree

21 files changed

+679
-418
lines changed

21 files changed

+679
-418
lines changed

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,20 @@ Finally, you can run the following to cleanup your environment and delete the
375375
./demo/delete-cluster.sh
376376
```
377377

378+
## Device Profiles
379+
380+
The example driver can manage several different kinds of devices to demonstrate
381+
a variety of DRA features. The functionality for each kind of device is
382+
organized into a "profile." Only one profile is active at a time for a given
383+
instance of the example driver, though the example driver may be installed
384+
multiple times in the same cluster with different active profiles. See the Helm
385+
chart's `deviceProfile` value in values.yaml for available profiles.
386+
387+
For driver developers, this pattern is specific to the example driver and not
388+
intended to be a recommendation for all DRA drivers. Other drivers will likely
389+
be simpler by implementing their logic more directly than through an
390+
abstraction like the example driver's profiles.
391+
378392
## Anatomy of a DRA resource driver
379393

380394
TBD

api/example.com/resource/gpu/v1alpha1/api.go

Lines changed: 1 addition & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,9 @@ import (
2020
"fmt"
2121

2222
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23-
"k8s.io/apimachinery/pkg/runtime"
24-
"k8s.io/apimachinery/pkg/runtime/schema"
25-
"k8s.io/apimachinery/pkg/runtime/serializer/json"
2623
)
2724

28-
const (
29-
GroupName = "gpu.resource.example.com"
30-
Version = "v1alpha1"
31-
32-
GpuConfigKind = "GpuConfig"
33-
)
34-
35-
// Decoder implements a decoder for objects in this API group.
36-
var Decoder runtime.Decoder
25+
const GpuConfigKind = "GpuConfig"
3726

3827
// +genclient
3928
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -82,29 +71,3 @@ func (c *GpuConfig) Normalize() error {
8271
}
8372
return nil
8473
}
85-
86-
func init() {
87-
// Create a new scheme and add our types to it. If at some point in the
88-
// future a new version of the configuration API becomes necessary, then
89-
// conversion functions can be generated and registered to continue
90-
// supporting older versions.
91-
scheme := runtime.NewScheme()
92-
schemeGroupVersion := schema.GroupVersion{
93-
Group: GroupName,
94-
Version: Version,
95-
}
96-
scheme.AddKnownTypes(schemeGroupVersion,
97-
&GpuConfig{},
98-
)
99-
metav1.AddToGroupVersion(scheme, schemeGroupVersion)
100-
101-
// Set up a json serializer to decode our types.
102-
Decoder = json.NewSerializerWithOptions(
103-
json.DefaultMetaFactory,
104-
scheme,
105-
scheme,
106-
json.SerializerOptions{
107-
Pretty: true, Strict: true,
108-
},
109-
)
110-
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Copyright The Kubernetes Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
"k8s.io/apimachinery/pkg/runtime"
22+
"k8s.io/apimachinery/pkg/runtime/schema"
23+
)
24+
25+
const (
26+
GroupName = "gpu.resource.example.com"
27+
Version = "v1alpha1"
28+
)
29+
30+
// SchemeGroupVersion is group version used to register these objects.
31+
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: Version}
32+
33+
var (
34+
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
35+
AddToScheme = SchemeBuilder.AddToScheme
36+
)
37+
38+
// Adds the list of known types to the given scheme.
39+
func addKnownTypes(scheme *runtime.Scheme) error {
40+
scheme.AddKnownTypes(SchemeGroupVersion,
41+
&GpuConfig{},
42+
)
43+
metav1.AddToGroupVersion(scheme, SchemeGroupVersion)
44+
return nil
45+
}

cmd/dra-example-kubeletplugin/cdi.go

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,50 +19,52 @@ package main
1919
import (
2020
"fmt"
2121
"os"
22-
23-
"sigs.k8s.io/dra-example-driver/pkg/consts"
22+
"regexp"
23+
"strings"
2424

2525
cdiapi "tags.cncf.io/container-device-interface/pkg/cdi"
2626
cdiparser "tags.cncf.io/container-device-interface/pkg/parser"
2727
cdispec "tags.cncf.io/container-device-interface/specs-go"
28+
29+
"sigs.k8s.io/dra-example-driver/internal/profiles"
2830
)
2931

30-
const (
31-
cdiVendor = "k8s." + consts.DriverName
32-
cdiClass = "gpu"
33-
cdiKind = cdiVendor + "/" + cdiClass
32+
const cdiCommonDeviceName = "common"
3433

35-
cdiCommonDeviceName = "common"
36-
)
34+
var nonWord = regexp.MustCompile(`[^a-zA-Z0-9]+`)
3735

3836
type CDIHandler struct {
39-
cache *cdiapi.Cache
37+
cache *cdiapi.Cache
38+
driverName string
39+
class string
4040
}
4141

42-
func NewCDIHandler(config *Config) (*CDIHandler, error) {
42+
func NewCDIHandler(root string, driverName, class string) (*CDIHandler, error) {
4343
cache, err := cdiapi.NewCache(
44-
cdiapi.WithSpecDirs(config.flags.cdiRoot),
44+
cdiapi.WithSpecDirs(root),
4545
)
4646
if err != nil {
4747
return nil, fmt.Errorf("unable to create a new CDI cache: %w", err)
4848
}
4949
handler := &CDIHandler{
50-
cache: cache,
50+
cache: cache,
51+
driverName: driverName,
52+
class: class,
5153
}
5254

5355
return handler, nil
5456
}
5557

5658
func (cdi *CDIHandler) CreateCommonSpecFile() error {
5759
spec := &cdispec.Spec{
58-
Kind: cdiKind,
60+
Kind: cdi.kind(),
5961
Devices: []cdispec.Device{
6062
{
6163
Name: cdiCommonDeviceName,
6264
ContainerEdits: cdispec.ContainerEdits{
6365
Env: []string{
6466
fmt.Sprintf("KUBERNETES_NODE_NAME=%s", os.Getenv("NODE_NAME")),
65-
fmt.Sprintf("DRA_RESOURCE_DRIVER_NAME=%s", consts.DriverName),
67+
fmt.Sprintf("DRA_RESOURCE_DRIVER_NAME=%s", cdi.driverName),
6668
},
6769
},
6870
},
@@ -83,19 +85,20 @@ func (cdi *CDIHandler) CreateCommonSpecFile() error {
8385
return cdi.cache.WriteSpec(spec, specName)
8486
}
8587

86-
func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, devices PreparedDevices) error {
87-
specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiClass, claimUID)
88+
func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, devices profiles.PreparedDevices) error {
89+
specName := cdiapi.GenerateTransientSpecName(cdi.vendor(), cdi.class, claimUID)
8890

8991
spec := &cdispec.Spec{
90-
Kind: cdiKind,
92+
Kind: cdi.kind(),
9193
Devices: []cdispec.Device{},
9294
}
9395

9496
for _, device := range devices {
97+
deviceEnvKey := strings.ToUpper(nonWord.ReplaceAllString(device.DeviceName, "_"))
9598
claimEdits := cdiapi.ContainerEdits{
9699
ContainerEdits: &cdispec.ContainerEdits{
97100
Env: []string{
98-
fmt.Sprintf("GPU_DEVICE_%s_RESOURCE_CLAIM=%s", device.DeviceName[4:], claimUID),
101+
fmt.Sprintf("%s_DEVICE_%s_RESOURCE_CLAIM=%s", strings.ToUpper(cdi.class), deviceEnvKey, claimUID),
99102
},
100103
},
101104
}
@@ -119,19 +122,27 @@ func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, devices PreparedDevi
119122
}
120123

121124
func (cdi *CDIHandler) DeleteClaimSpecFile(claimUID string) error {
122-
specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiClass, claimUID)
125+
specName := cdiapi.GenerateTransientSpecName(cdi.vendor(), cdi.class, claimUID)
123126
return cdi.cache.RemoveSpec(specName)
124127
}
125128

126129
func (cdi *CDIHandler) GetClaimDevices(claimUID string, devices []string) []string {
127130
cdiDevices := []string{
128-
cdiparser.QualifiedName(cdiVendor, cdiClass, cdiCommonDeviceName),
131+
cdiparser.QualifiedName(cdi.vendor(), cdi.class, cdiCommonDeviceName),
129132
}
130133

131134
for _, device := range devices {
132-
cdiDevice := cdiparser.QualifiedName(cdiVendor, cdiClass, fmt.Sprintf("%s-%s", claimUID, device))
135+
cdiDevice := cdiparser.QualifiedName(cdi.vendor(), cdi.class, fmt.Sprintf("%s-%s", claimUID, device))
133136
cdiDevices = append(cdiDevices, cdiDevice)
134137
}
135138

136139
return cdiDevices
137140
}
141+
142+
func (cdi *CDIHandler) kind() string {
143+
return cdi.vendor() + "/" + cdi.class
144+
}
145+
146+
func (cdi *CDIHandler) vendor() string {
147+
return "k8s." + cdi.driverName
148+
}

cmd/dra-example-kubeletplugin/discovery.go

Lines changed: 0 additions & 84 deletions
This file was deleted.

cmd/dra-example-kubeletplugin/driver.go

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,13 @@ import (
2020
"context"
2121
"errors"
2222
"fmt"
23-
"maps"
2423

2524
resourceapi "k8s.io/api/resource/v1"
2625
"k8s.io/apimachinery/pkg/types"
2726
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2827
coreclientset "k8s.io/client-go/kubernetes"
2928
"k8s.io/dynamic-resource-allocation/kubeletplugin"
30-
"k8s.io/dynamic-resource-allocation/resourceslice"
3129
"k8s.io/klog/v2"
32-
33-
"sigs.k8s.io/dra-example-driver/pkg/consts"
3430
)
3531

3632
type driver struct {
@@ -58,7 +54,7 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
5854
driver,
5955
kubeletplugin.KubeClient(config.coreclient),
6056
kubeletplugin.NodeName(config.flags.nodeName),
61-
kubeletplugin.DriverName(consts.DriverName),
57+
kubeletplugin.DriverName(config.flags.driverName),
6258
kubeletplugin.RegistrarDirectoryPath(config.flags.kubeletRegistrarDirectoryPath),
6359
kubeletplugin.PluginDataDirectoryPath(config.DriverPluginPath()),
6460
)
@@ -67,28 +63,12 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
6763
}
6864
driver.helper = helper
6965

70-
devices := make([]resourceapi.Device, 0, len(state.allocatable))
71-
for device := range maps.Values(state.allocatable) {
72-
devices = append(devices, device)
73-
}
74-
resources := resourceslice.DriverResources{
75-
Pools: map[string]resourceslice.Pool{
76-
config.flags.nodeName: {
77-
Slices: []resourceslice.Slice{
78-
{
79-
Devices: devices,
80-
},
81-
},
82-
},
83-
},
84-
}
85-
8666
driver.healthcheck, err = startHealthcheck(ctx, config)
8767
if err != nil {
8868
return nil, fmt.Errorf("start healthcheck: %w", err)
8969
}
9070

91-
if err := helper.PublishResources(ctx, resources); err != nil {
71+
if err := helper.PublishResources(ctx, state.driverResources); err != nil {
9272
return nil, err
9373
}
9474

0 commit comments

Comments
 (0)