Skip to content

Commit 9a7e4cc

Browse files
committed
DRA admin access: add feature gate
The new DRAAdminAccess feature gate has the following effects: - If disabled in the apiserver, the spec.devices.requests[*].adminAccess field gets cleared. Same in the status. In both cases the scenario that it was already set and a claim or claim template get updated is special: in those cases, the field is not cleared. Also, allocating a claim with admin access is allowed regardless of the feature gate and the field is not cleared. In practice, the scheduler will not do that. - If disabled in the resource claim controller, creating ResourceClaims with the field set gets rejected. This prevents running workloads which depend on admin access. - If disabled in the scheduler, claims with admin access don't get allocated. The effect is the same. The alternative would have been to ignore the fields in claim controller and scheduler. This is bad because a monitoring workload then runs, blocking resources that probably were meant for production workloads.
1 parent f3fef01 commit 9a7e4cc

File tree

29 files changed

+564
-56
lines changed

29 files changed

+564
-56
lines changed

api/openapi-spec/swagger.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/openapi-spec/v3/apis__resource.k8s.io__v1alpha3_openapi.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/kube-controller-manager/app/core.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ func newResourceClaimControllerDescriptor() *ControllerDescriptor {
407407
func startResourceClaimController(ctx context.Context, controllerContext ControllerContext, controllerName string) (controller.Interface, bool, error) {
408408
ephemeralController, err := resourceclaim.NewController(
409409
klog.FromContext(ctx),
410+
utilfeature.DefaultFeatureGate.Enabled(features.DRAAdminAccess),
410411
controllerContext.ClientBuilder.ClientOrDie("resource-claim-controller"),
411412
controllerContext.InformerFactory.Core().V1().Pods(),
412413
controllerContext.InformerFactory.Resource().V1alpha3().ResourceClaims(),

pkg/apis/resource/types.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,8 +442,12 @@ type DeviceRequest struct {
442442
// all ordinary claims to the device with respect to access modes and
443443
// any resource allocations.
444444
//
445+
// This is an alpha field and requires enabling the DRAAdminAccess
446+
// feature gate.
447+
//
445448
// +optional
446449
// +default=false
450+
// +featureGate=DRAAdminAccess
447451
AdminAccess bool
448452
}
449453

@@ -786,12 +790,17 @@ type DeviceRequestAllocationResult struct {
786790
// AdminAccess is a copy of the AdminAccess value in the
787791
// request which caused this device to be allocated.
788792
//
789-
// New allocations are required to have this set. Old allocations made
793+
// New allocations are required to have this set when the DRAAdminAccess
794+
// feature gate is enabled. Old allocations made
790795
// by Kubernetes 1.31 do not have it yet. Clients which want to
791796
// support Kubernetes 1.31 need to look up the request and retrieve
792797
// the value from there if this field is not set.
793798
//
799+
// This is an alpha field and requires enabling the DRAAdminAccess
800+
// feature gate.
801+
//
794802
// +required
803+
// +featureGate=DRAAdminAccess
795804
AdminAccess *bool
796805
}
797806

pkg/apis/resource/validation/validation.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@ import (
3232
"k8s.io/apimachinery/pkg/util/validation/field"
3333
"k8s.io/apiserver/pkg/cel"
3434
"k8s.io/apiserver/pkg/cel/environment"
35+
utilfeature "k8s.io/apiserver/pkg/util/feature"
3536
dracel "k8s.io/dynamic-resource-allocation/cel"
3637
corevalidation "k8s.io/kubernetes/pkg/apis/core/validation"
3738
"k8s.io/kubernetes/pkg/apis/resource"
39+
"k8s.io/kubernetes/pkg/features"
3840
)
3941

4042
var (
@@ -343,7 +345,7 @@ func validateDeviceRequestAllocationResult(result resource.DeviceRequestAllocati
343345
allErrs = append(allErrs, validateDriverName(result.Driver, fldPath.Child("driver"))...)
344346
allErrs = append(allErrs, validatePoolName(result.Pool, fldPath.Child("pool"))...)
345347
allErrs = append(allErrs, validateDeviceName(result.Device, fldPath.Child("device"))...)
346-
if result.AdminAccess == nil {
348+
if result.AdminAccess == nil && utilfeature.DefaultFeatureGate.Enabled(features.DRAAdminAccess) {
347349
allErrs = append(allErrs, field.Required(fldPath.Child("adminAccess"), ""))
348350
}
349351
return allErrs

pkg/apis/resource/validation/validation_resourceclaim_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,11 @@ import (
2727
"k8s.io/apimachinery/pkg/runtime"
2828
"k8s.io/apimachinery/pkg/types"
2929
"k8s.io/apimachinery/pkg/util/validation/field"
30+
utilfeature "k8s.io/apiserver/pkg/util/feature"
31+
featuregatetesting "k8s.io/component-base/featuregate/testing"
3032
"k8s.io/kubernetes/pkg/apis/core"
3133
"k8s.io/kubernetes/pkg/apis/resource"
34+
"k8s.io/kubernetes/pkg/features"
3235
"k8s.io/utils/pointer"
3336
"k8s.io/utils/ptr"
3437
)
@@ -421,6 +424,7 @@ func TestValidateClaimStatusUpdate(t *testing.T) {
421424
validAllocatedClaimOld.Status.Allocation.Devices.Results[0].AdminAccess = nil // Not required in 1.31.
422425

423426
scenarios := map[string]struct {
427+
adminAccess bool
424428
oldClaim *resource.ResourceClaim
425429
update func(claim *resource.ResourceClaim) *resource.ResourceClaim
426430
wantFailures field.ErrorList
@@ -475,6 +479,7 @@ func TestValidateClaimStatusUpdate(t *testing.T) {
475479
},
476480
},
477481
"invalid-add-allocation-missing-admin-access": {
482+
adminAccess: true,
478483
wantFailures: field.ErrorList{
479484
field.Required(field.NewPath("status", "allocation", "devices", "results").Index(0).Child("adminAccess"), ""),
480485
},
@@ -494,6 +499,24 @@ func TestValidateClaimStatusUpdate(t *testing.T) {
494499
return claim
495500
},
496501
},
502+
"okay-add-allocation-missing-admin-access": {
503+
adminAccess: false,
504+
oldClaim: validClaim,
505+
update: func(claim *resource.ResourceClaim) *resource.ResourceClaim {
506+
claim.Status.Allocation = &resource.AllocationResult{
507+
Devices: resource.DeviceAllocationResult{
508+
Results: []resource.DeviceRequestAllocationResult{{
509+
Request: goodName,
510+
Driver: goodName,
511+
Pool: goodName,
512+
Device: goodName,
513+
AdminAccess: nil, // Intentionally not set.
514+
}},
515+
},
516+
}
517+
return claim
518+
},
519+
},
497520
"invalid-node-selector": {
498521
wantFailures: field.ErrorList{field.Required(field.NewPath("status", "allocation", "nodeSelector", "nodeSelectorTerms"), "must have at least one node selector term")},
499522
oldClaim: validClaim,
@@ -691,6 +714,7 @@ func TestValidateClaimStatusUpdate(t *testing.T) {
691714

692715
for name, scenario := range scenarios {
693716
t.Run(name, func(t *testing.T) {
717+
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DRAAdminAccess, scenario.adminAccess)
694718
scenario.oldClaim.ResourceVersion = "1"
695719
errs := ValidateResourceClaimStatusUpdate(scenario.update(scenario.oldClaim.DeepCopy()), scenario.oldClaim)
696720
assert.Equal(t, scenario.wantFailures, errs)

pkg/controller/resourceclaim/controller.go

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ const (
7171

7272
// Controller creates ResourceClaims for ResourceClaimTemplates in a pod spec.
7373
type Controller struct {
74+
// adminAccessEnabled matches the DRAAdminAccess feature gate state.
75+
adminAccessEnabled bool
76+
7477
// kubeClient is the kube API client used to communicate with the API
7578
// server.
7679
kubeClient clientset.Interface
@@ -118,20 +121,22 @@ const (
118121
// NewController creates a ResourceClaim controller.
119122
func NewController(
120123
logger klog.Logger,
124+
adminAccessEnabled bool,
121125
kubeClient clientset.Interface,
122126
podInformer v1informers.PodInformer,
123127
claimInformer resourceinformers.ResourceClaimInformer,
124128
templateInformer resourceinformers.ResourceClaimTemplateInformer) (*Controller, error) {
125129

126130
ec := &Controller{
127-
kubeClient: kubeClient,
128-
podLister: podInformer.Lister(),
129-
podIndexer: podInformer.Informer().GetIndexer(),
130-
podSynced: podInformer.Informer().HasSynced,
131-
claimLister: claimInformer.Lister(),
132-
claimsSynced: claimInformer.Informer().HasSynced,
133-
templateLister: templateInformer.Lister(),
134-
templatesSynced: templateInformer.Informer().HasSynced,
131+
adminAccessEnabled: adminAccessEnabled,
132+
kubeClient: kubeClient,
133+
podLister: podInformer.Lister(),
134+
podIndexer: podInformer.Informer().GetIndexer(),
135+
podSynced: podInformer.Informer().HasSynced,
136+
claimLister: claimInformer.Lister(),
137+
claimsSynced: claimInformer.Informer().HasSynced,
138+
templateLister: templateInformer.Lister(),
139+
templatesSynced: templateInformer.Informer().HasSynced,
135140
queue: workqueue.NewTypedRateLimitingQueueWithConfig(
136141
workqueue.DefaultTypedControllerRateLimiter[string](),
137142
workqueue.TypedRateLimitingQueueConfig[string]{Name: "resource_claim"},
@@ -612,6 +617,10 @@ func (ec *Controller) handleClaim(ctx context.Context, pod *v1.Pod, podClaim v1.
612617
return fmt.Errorf("resource claim template %q: %v", *templateName, err)
613618
}
614619

620+
if !ec.adminAccessEnabled && needsAdminAccess(template) {
621+
return errors.New("admin access is requested, but the feature is disabled")
622+
}
623+
615624
// Create the ResourceClaim with pod as owner, with a generated name that uses
616625
// <pod>-<claim name> as base.
617626
isTrue := true
@@ -670,6 +679,15 @@ func (ec *Controller) handleClaim(ctx context.Context, pod *v1.Pod, podClaim v1.
670679
return nil
671680
}
672681

682+
func needsAdminAccess(claimTemplate *resourceapi.ResourceClaimTemplate) bool {
683+
for _, request := range claimTemplate.Spec.Spec.Devices.Requests {
684+
if request.AdminAccess {
685+
return true
686+
}
687+
}
688+
return false
689+
}
690+
673691
// findPodResourceClaim looks for an existing ResourceClaim with the right
674692
// annotation (ties it to the pod claim) and the right ownership (ties it to
675693
// the pod).

pkg/controller/resourceclaim/controller_test.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ import (
3737
"k8s.io/client-go/kubernetes/fake"
3838
k8stesting "k8s.io/client-go/testing"
3939
"k8s.io/component-base/metrics/testutil"
40-
"k8s.io/klog/v2"
4140
"k8s.io/kubernetes/pkg/controller"
4241
"k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
4342
"k8s.io/kubernetes/test/utils/ktesting"
@@ -83,17 +82,18 @@ var (
8382

8483
func TestSyncHandler(t *testing.T) {
8584
tests := []struct {
86-
name string
87-
key string
88-
claims []*resourceapi.ResourceClaim
89-
claimsInCache []*resourceapi.ResourceClaim
90-
pods []*v1.Pod
91-
podsLater []*v1.Pod
92-
templates []*resourceapi.ResourceClaimTemplate
93-
expectedClaims []resourceapi.ResourceClaim
94-
expectedStatuses map[string][]v1.PodResourceClaimStatus
95-
expectedError bool
96-
expectedMetrics expectedMetrics
85+
name string
86+
key string
87+
adminAccessEnabled bool
88+
claims []*resourceapi.ResourceClaim
89+
claimsInCache []*resourceapi.ResourceClaim
90+
pods []*v1.Pod
91+
podsLater []*v1.Pod
92+
templates []*resourceapi.ResourceClaimTemplate
93+
expectedClaims []resourceapi.ResourceClaim
94+
expectedStatuses map[string][]v1.PodResourceClaimStatus
95+
expectedError bool
96+
expectedMetrics expectedMetrics
9797
}{
9898
{
9999
name: "create",
@@ -390,7 +390,7 @@ func TestSyncHandler(t *testing.T) {
390390
claimInformer := informerFactory.Resource().V1alpha3().ResourceClaims()
391391
templateInformer := informerFactory.Resource().V1alpha3().ResourceClaimTemplates()
392392

393-
ec, err := NewController(klog.FromContext(tCtx), fakeKubeClient, podInformer, claimInformer, templateInformer)
393+
ec, err := NewController(tCtx.Logger(), tc.adminAccessEnabled, fakeKubeClient, podInformer, claimInformer, templateInformer)
394394
if err != nil {
395395
t.Fatalf("error creating ephemeral controller : %v", err)
396396
}
@@ -465,7 +465,7 @@ func TestResourceClaimEventHandler(t *testing.T) {
465465
templateInformer := informerFactory.Resource().V1alpha3().ResourceClaimTemplates()
466466
claimClient := fakeKubeClient.ResourceV1alpha3().ResourceClaims(testNamespace)
467467

468-
_, err := NewController(tCtx.Logger(), fakeKubeClient, podInformer, claimInformer, templateInformer)
468+
_, err := NewController(tCtx.Logger(), false /* admin access */, fakeKubeClient, podInformer, claimInformer, templateInformer)
469469
tCtx.ExpectNoError(err, "creating ephemeral controller")
470470

471471
informerFactory.Start(tCtx.Done())

pkg/controlplane/apiserver/options/validation.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ func validateNodeSelectorAuthorizationFeature() []error {
7777
return nil
7878
}
7979

80+
func validateDRAAdminAccessFeature() []error {
81+
if utilfeature.DefaultFeatureGate.Enabled(features.DRAAdminAccess) && !utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
82+
return []error{fmt.Errorf("DRAAdminAccess feature requires DynamicResourceAllocation feature to be enabled")}
83+
}
84+
return nil
85+
}
86+
8087
func validateUnknownVersionInteroperabilityProxyFeature() []error {
8188
if utilfeature.DefaultFeatureGate.Enabled(features.UnknownVersionInteroperabilityProxy) {
8289
if utilfeature.DefaultFeatureGate.Enabled(genericfeatures.StorageVersionAPI) {
@@ -121,6 +128,7 @@ func (s *Options) Validate() []error {
121128
errs = append(errs, validateUnknownVersionInteroperabilityProxyFeature()...)
122129
errs = append(errs, validateUnknownVersionInteroperabilityProxyFlags(s)...)
123130
errs = append(errs, validateNodeSelectorAuthorizationFeature()...)
131+
errs = append(errs, validateDRAAdminAccessFeature()...)
124132

125133
return errs
126134
}

pkg/features/kube_features.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,18 @@ const (
203203
// DisableNodeKubeProxyVersion disable the status.nodeInfo.kubeProxyVersion field of v1.Node
204204
DisableNodeKubeProxyVersion featuregate.Feature = "DisableNodeKubeProxyVersion"
205205

206+
// owner: @pohly
207+
// kep: http://kep.k8s.io/4381
208+
//
209+
// Enables support for requesting admin access in a ResourceClaim.
210+
// Admin access is granted even if a device is already in use and,
211+
// depending on the DRA driver, may enable additional permissions
212+
// when a container uses the allocated device.
213+
//
214+
// This feature gate is currently defined in KEP #4381. The intent
215+
// is to move it into a separate KEP.
216+
DRAAdminAccess featuregate.Feature = "DRAAdminAccess"
217+
206218
// owner: @pohly
207219
// kep: http://kep.k8s.io/4381
208220
//

0 commit comments

Comments
 (0)