Skip to content

Commit 8b5687b

Browse files
authored
DRAAdminAccess: add example (#112)
* DRAAdminAccess: add example Signed-off-by: Rita Zhang <rita.z.zhang@gmail.com> * address comments and make it os agnostic Signed-off-by: Rita Zhang <rita.z.zhang@gmail.com> * address comments Signed-off-by: Rita Zhang <rita.z.zhang@gmail.com> * address comments Signed-off-by: Rita Zhang <rita.z.zhang@gmail.com> --------- Signed-off-by: Rita Zhang <rita.z.zhang@gmail.com>
1 parent f47c6b1 commit 8b5687b

File tree

10 files changed

+235
-14
lines changed

10 files changed

+235
-14
lines changed

README.md

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,10 +348,58 @@ You can use the IDs of the GPUs as well as the GPU sharing settings set in
348348
these environment variables to verify that they were handed out in a way
349349
consistent with the semantics shown in the figure above.
350350

351+
### Demo DRA Admin Access Feature
352+
This example driver includes support for the [DRA AdminAccess feature](https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/#admin-access), which allows administrators to gain privileged access to devices already in use by other users. This example demonstrates the end-to-end flow by setting the `DRA_ADMIN_ACCESS` environment variable. A driver managing real devices could use this to expose host hardware information.
353+
354+
#### Usage Example
355+
356+
See `demo/gpu-test7.yaml` for a complete example. Key points:
357+
358+
1. **Namespace**: Must have the `resource.kubernetes.io/admin-access` label set to create ResourceClaimTemplate and ResourceClaim with `adminAccess: true` for Kubernetes v1.34+.
359+
```yaml
360+
apiVersion: v1
361+
kind: Namespace
362+
metadata:
363+
name: gpu-test7
364+
labels:
365+
resource.kubernetes.io/admin-access: "true"
366+
```
367+
368+
2. **Resource Claim Template**: Request must have `adminAccess: true`. The `allocationMode: All` is used to demonstrate accessing all available devices with admin privileges.
369+
```yaml
370+
spec:
371+
spec:
372+
devices:
373+
requests:
374+
- name: admin-gpu
375+
exactly:
376+
deviceClassName: gpu.example.com
377+
allocationMode: All
378+
adminAccess: true
379+
```
380+
381+
3. **Container**: Will receive elevated privileges from the driver, represented here as environment variables
382+
```bash
383+
echo "DRA Admin Access: $DRA_ADMIN_ACCESS"
384+
# Output examples:
385+
# DRA Admin Access: true
386+
```
387+
388+
#### Testing
389+
390+
To run this demo:
391+
```bash
392+
./demo/test-admin-access.sh
393+
```
394+
395+
This demonstration shows the end-to-end flow of the DRA AdminAccess feature. In a production environment, drivers could use this admin access indication to provide additional privileged capabilities or information to authorized workloads.
396+
397+
### Clean Up
398+
351399
Once you have verified everything is running correctly, delete all of the
352400
example apps:
353401
```bash
354-
kubectl delete --wait=false --filename=demo/gpu-test{1,2,3,4,5}.yaml
402+
kubectl delete --wait=false --filename=demo/gpu-test{1,2,3,4,5,7}.yaml
355403
```
356404

357405
And wait for them to terminate:
@@ -366,6 +414,7 @@ gpu-test3 pod0 1/1 Terminating 0 31m
366414
gpu-test3 pod1 1/1 Terminating 0 31m
367415
gpu-test4 pod0 1/1 Terminating 0 31m
368416
gpu-test5 pod0 4/4 Terminating 0 31m
417+
gpu-test7 pod0 1/1 Terminating 0 31m
369418
...
370419
```
371420

cmd/dra-example-kubeletplugin/cdi.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023 The Kubernetes Authors.
2+
* Copyright The Kubernetes Authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -99,9 +99,13 @@ func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, devices profiles.Pre
9999
ContainerEdits: &cdispec.ContainerEdits{
100100
Env: []string{
101101
fmt.Sprintf("%s_DEVICE_%s_RESOURCE_CLAIM=%s", strings.ToUpper(cdi.class), deviceEnvKey, claimUID),
102+
fmt.Sprintf("DRA_ADMIN_ACCESS=%t", device.AdminAccess),
102103
},
103104
},
104105
}
106+
107+
// If this device has admin access, then here is where to inject host hardware information
108+
105109
claimEdits.Append(device.ContainerEdits)
106110

107111
cdiDevice := cdispec.Device{

cmd/dra-example-kubeletplugin/driver.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023 The Kubernetes Authors.
2+
* Copyright The Kubernetes Authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -95,8 +95,10 @@ func (d *driver) PrepareResourceClaims(ctx context.Context, claims []*resourceap
9595
}
9696

9797
func (d *driver) prepareResourceClaim(_ context.Context, claim *resourceapi.ResourceClaim) kubeletplugin.PrepareResult {
98+
klog.Infof("Preparing claim: UID=%s, Namespace=%s, Name=%s", claim.UID, claim.Namespace, claim.Name)
9899
preparedPBs, err := d.state.Prepare(claim)
99100
if err != nil {
101+
klog.Errorf("Error preparing devices for claim %v: %v", claim.UID, err)
100102
return kubeletplugin.PrepareResult{
101103
Err: fmt.Errorf("error preparing devices for claim %v: %w", claim.UID, err),
102104
}

cmd/dra-example-kubeletplugin/state.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023 The Kubernetes Authors.
2+
* Copyright The Kubernetes Authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -139,7 +139,6 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi
139139
if preparedClaims[claimUID] != nil {
140140
return preparedClaims[claimUID].GetDevices(), nil
141141
}
142-
143142
preparedDevices, err := s.prepareDevices(claim)
144143
if err != nil {
145144
return nil, fmt.Errorf("prepare failed: %v", err)
@@ -163,7 +162,10 @@ func (s *DeviceState) Unprepare(claimUID string) error {
163162

164163
checkpoint := newCheckpoint()
165164
if err := s.checkpointManager.GetCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil {
166-
return fmt.Errorf("unable to sync from checkpoint: %v", err)
165+
checkpoint = newCheckpoint()
166+
if err := s.checkpointManager.CreateCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil {
167+
return fmt.Errorf("unable to create new checkpoint: %v", err)
168+
}
167169
}
168170
preparedClaims := checkpoint.V1.PreparedClaims
169171

@@ -192,6 +194,8 @@ func (s *DeviceState) prepareDevices(claim *resourceapi.ResourceClaim) (profiles
192194
if claim.Status.Allocation == nil {
193195
return nil, fmt.Errorf("claim not yet allocated")
194196
}
197+
// Check if any device request has admin access
198+
hasAdminAccess := s.checkAdminAccess(claim)
195199

196200
// Retrieve the full set of device configs for the driver.
197201
configs, err := GetOpaqueDeviceConfigs(
@@ -257,6 +261,7 @@ func (s *DeviceState) prepareDevices(claim *resourceapi.ResourceClaim) (profiles
257261
CdiDeviceIds: s.cdi.GetClaimDevices(string(claim.UID), []string{result.Device}),
258262
},
259263
ContainerEdits: perDeviceCDIContainerEdits[result.Device],
264+
AdminAccess: hasAdminAccess,
260265
}
261266
preparedDevices = append(preparedDevices, device)
262267
}
@@ -269,6 +274,18 @@ func (s *DeviceState) unprepareDevices(claimUID string, devices profiles.Prepare
269274
return nil
270275
}
271276

277+
// checkAdminAccess determines if a resource claim requires admin access.
278+
func (s *DeviceState) checkAdminAccess(claim *resourceapi.ResourceClaim) bool {
279+
if claim != nil && claim.Status.Allocation != nil {
280+
for _, result := range claim.Status.Allocation.Devices.Results {
281+
if result.AdminAccess != nil && *result.AdminAccess {
282+
return true
283+
}
284+
}
285+
}
286+
return false
287+
}
288+
272289
// GetOpaqueDeviceConfigs returns an ordered list of the configs contained in possibleConfigs for this driver.
273290
//
274291
// Configs can either come from the resource claim itself or from the device

demo/gpu-test7.yaml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# One Namespace with admin access label
2+
# One pod with one container requesting all GPUs with admin access
3+
# This demo shows the DRA admin access feature with DRA_ADMIN_ACCESS environment variable
4+
5+
---
6+
apiVersion: v1
7+
kind: Namespace
8+
metadata:
9+
name: gpu-test7
10+
labels:
11+
resource.kubernetes.io/admin-access: "true"
12+
---
13+
apiVersion: resource.k8s.io/v1
14+
kind: ResourceClaimTemplate
15+
metadata:
16+
namespace: gpu-test7
17+
name: multiple-gpus-admin
18+
spec:
19+
spec:
20+
devices:
21+
requests:
22+
- name: admin-gpu
23+
exactly:
24+
deviceClassName: gpu.example.com
25+
allocationMode: All
26+
adminAccess: true
27+
28+
---
29+
apiVersion: v1
30+
kind: Pod
31+
metadata:
32+
namespace: gpu-test7
33+
name: pod0
34+
spec:
35+
containers:
36+
- name: ctr0
37+
image: ubuntu:22.04
38+
command: ["bash", "-c"]
39+
args:
40+
- |
41+
export
42+
echo "=== DRA Admin Access Demo ==="
43+
echo "DRA Admin Access: $DRA_ADMIN_ACCESS"
44+
echo ""
45+
echo "GPU Environment Variables:"
46+
env | grep GPU_ | sort
47+
echo ""
48+
echo "=== Sleeping to allow inspection ==="
49+
trap 'exit 0' TERM
50+
sleep 9999 & wait
51+
resources:
52+
claims:
53+
- name: admin-gpus
54+
resourceClaims:
55+
- name: admin-gpus
56+
resourceClaimTemplateName: multiple-gpus-admin

demo/scripts/kind-cluster-config.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ kind: Cluster
22
apiVersion: kind.x-k8s.io/v1alpha4
33
featureGates:
44
DynamicResourceAllocation: true
5+
DRAAdminAccess: true
56
containerdConfigPatches:
67
# Enable CDI as described in
78
# https://tags.cncf.io/container-device-interface#containerd-configuration
@@ -17,11 +18,11 @@ nodes:
1718
extraArgs:
1819
runtime-config: "resource.k8s.io/v1beta1=true"
1920
scheduler:
20-
extraArgs:
21-
v: "1"
21+
extraArgs:
22+
v: "1"
2223
controllerManager:
23-
extraArgs:
24-
v: "1"
24+
extraArgs:
25+
v: "1"
2526
- |
2627
kind: InitConfiguration
2728
nodeRegistration:

demo/test-admin-access.sh

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright The Kubernetes Authors.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# This script demonstrates the DRA Admin Access feature by deploying
18+
# the demo and verifying the DRA_ADMIN_ACCESS environment variable is set
19+
20+
set -e
21+
22+
echo "=== DRA Admin Access Feature Test ==="
23+
echo
24+
25+
# Check if kubectl is available
26+
if ! command -v kubectl &> /dev/null; then
27+
echo "❌ kubectl is not available. Please install kubectl and ensure cluster access."
28+
exit 1
29+
fi
30+
31+
# Check if the cluster is accessible
32+
if ! kubectl cluster-info &> /dev/null; then
33+
echo "❌ Unable to access Kubernetes cluster. Please check your kubeconfig."
34+
exit 1
35+
fi
36+
37+
echo "✅ Kubernetes cluster is accessible"
38+
39+
# Apply the demo
40+
echo "📦 Applying gpu-test7.yaml demo..."
41+
kubectl apply -f demo/gpu-test7.yaml
42+
43+
echo "⏳ Waiting for pod to be ready..."
44+
kubectl wait --for=condition=Ready pod/pod0 -n gpu-test7 --timeout=120s || true
45+
46+
echo
47+
echo "=== Pod Status ==="
48+
kubectl get pods -n gpu-test7
49+
50+
echo
51+
echo "=== ResourceClaims Status ==="
52+
kubectl get resourceclaims -n gpu-test7
53+
54+
echo
55+
echo "=== Pod0 Logs (showing admin access demo) ==="
56+
kubectl logs pod0 -n gpu-test7 || echo "⚠️ Pod0 logs not ready yet"
57+
58+
echo
59+
echo "=== Checking DRA_ADMIN_ACCESS Environment Variable ==="
60+
DRA_ADMIN_ACCESS_POD0=$(kubectl exec pod0 -n gpu-test7 -- printenv DRA_ADMIN_ACCESS 2>/dev/null || echo "not found")
61+
62+
if [[ "$DRA_ADMIN_ACCESS_POD0" == "true" ]]; then
63+
echo "✅ Pod0: DRA_ADMIN_ACCESS=$DRA_ADMIN_ACCESS_POD0"
64+
else
65+
echo "❌ Pod0: DRA_ADMIN_ACCESS=$DRA_ADMIN_ACCESS_POD0 (expected: true)"
66+
fi
67+
68+
echo
69+
echo "=== Test Complete ==="
70+
echo "To clean up, run: kubectl delete namespace gpu-test7"

deployments/helm/dra-example-driver/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ serviceAccount:
3737
kubeletPlugin:
3838
# numDevices describes how many GPUs to advertise on each node when the "gpu"
3939
# deviceProfile is used. Not relevant for other profiles.
40-
numDevices: 8
40+
numDevices: 9
4141
priorityClassName: "system-node-critical"
4242
updateStrategy:
4343
type: RollingUpdate

internal/profiles/profiles.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ type PerDeviceCDIContainerEdits map[string]*cdiapi.ContainerEdits
3131
type PreparedDevice struct {
3232
drapbv1.Device
3333
ContainerEdits *cdiapi.ContainerEdits
34+
AdminAccess bool
3435
}
3536

3637
type PreparedDevices []*PreparedDevice

0 commit comments

Comments
 (0)