Skip to content

Commit 3690cb7

Browse files
authored
Merge pull request kubernetes#128101 from pohly/dra-api-cel-cost-limit
DRA API: implement CEL cost limit
2 parents 25d6f76 + d53cb79 commit 3690cb7

File tree

12 files changed

+432
-163
lines changed

12 files changed

+432
-163
lines changed

api/openapi-spec/swagger.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/openapi-spec/v3/apis__resource.k8s.io__v1alpha3_openapi.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@
139139
"properties": {
140140
"expression": {
141141
"default": "",
142-
"description": "Expression is a CEL expression which evaluates a single device. It must evaluate to true when the device under consideration satisfies the desired criteria, and false when it does not. Any other result is an error and causes allocation of devices to abort.\n\nThe expression's input is an object named \"device\", which carries the following properties:\n - driver (string): the name of the driver which defines this device.\n - attributes (map[string]object): the device's attributes, grouped by prefix\n (e.g. device.attributes[\"dra.example.com\"] evaluates to an object with all\n of the attributes which were prefixed by \"dra.example.com\".\n - capacity (map[string]object): the device's capacities, grouped by prefix.\n\nExample: Consider a device with driver=\"dra.example.com\", which exposes two attributes named \"model\" and \"ext.example.com/family\" and which exposes one capacity named \"modules\". This input to this expression would have the following fields:\n\n device.driver\n device.attributes[\"dra.example.com\"].model\n device.attributes[\"ext.example.com\"].family\n device.capacity[\"dra.example.com\"].modules\n\nThe device.driver field can be used to check for a specific driver, either as a high-level precondition (i.e. you only want to consider devices from this driver) or as part of a multi-clause expression that is meant to consider devices from different drivers.\n\nThe value type of each attribute is defined by the device definition, and users who write these expressions must consult the documentation for their specific drivers. The value type of each capacity is Quantity.\n\nIf an unknown prefix is used as a lookup in either device.attributes or device.capacity, an empty map will be returned. Any reference to an unknown field will cause an evaluation error and allocation to abort.\n\nA robust expression should check for the existence of attributes before referencing them.\n\nFor ease of use, the cel.bind() function is enabled, and can be used to simplify expressions that access multiple attributes with the same domain. For example:\n\n cel.bind(dra, device.attributes[\"dra.example.com\"], dra.someBool && dra.anotherBool)",
142+
"description": "Expression is a CEL expression which evaluates a single device. It must evaluate to true when the device under consideration satisfies the desired criteria, and false when it does not. Any other result is an error and causes allocation of devices to abort.\n\nThe expression's input is an object named \"device\", which carries the following properties:\n - driver (string): the name of the driver which defines this device.\n - attributes (map[string]object): the device's attributes, grouped by prefix\n (e.g. device.attributes[\"dra.example.com\"] evaluates to an object with all\n of the attributes which were prefixed by \"dra.example.com\".\n - capacity (map[string]object): the device's capacities, grouped by prefix.\n\nExample: Consider a device with driver=\"dra.example.com\", which exposes two attributes named \"model\" and \"ext.example.com/family\" and which exposes one capacity named \"modules\". This input to this expression would have the following fields:\n\n device.driver\n device.attributes[\"dra.example.com\"].model\n device.attributes[\"ext.example.com\"].family\n device.capacity[\"dra.example.com\"].modules\n\nThe device.driver field can be used to check for a specific driver, either as a high-level precondition (i.e. you only want to consider devices from this driver) or as part of a multi-clause expression that is meant to consider devices from different drivers.\n\nThe value type of each attribute is defined by the device definition, and users who write these expressions must consult the documentation for their specific drivers. The value type of each capacity is Quantity.\n\nIf an unknown prefix is used as a lookup in either device.attributes or device.capacity, an empty map will be returned. Any reference to an unknown field will cause an evaluation error and allocation to abort.\n\nA robust expression should check for the existence of attributes before referencing them.\n\nFor ease of use, the cel.bind() function is enabled, and can be used to simplify expressions that access multiple attributes with the same domain. For example:\n\n cel.bind(dra, device.attributes[\"dra.example.com\"], dra.someBool && dra.anotherBool)\n\nThe length of the expression must be smaller or equal to 10 Ki. The cost of evaluating it is also limited based on the estimated number of logical steps.",
143143
"type": "string"
144144
}
145145
},

pkg/apis/resource/types.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,10 +517,42 @@ type CELDeviceSelector struct {
517517
//
518518
// cel.bind(dra, device.attributes["dra.example.com"], dra.someBool && dra.anotherBool)
519519
//
520+
// The length of the expression must be smaller or equal to 10 Ki. The
521+
// cost of evaluating it is also limited based on the estimated number
522+
// of logical steps.
523+
//
520524
// +required
521525
Expression string
522526
}
523527

528+
// CELSelectorExpressionMaxCost specifies the cost limit for a single CEL selector
529+
// evaluation.
530+
//
531+
// There is no overall budget for selecting a device, so the actual time
532+
// required for that is proportional to the number of CEL selectors and how
533+
// often they need to be evaluated, which can vary depending on several factors
534+
// (number of devices, cluster utilization, additional constraints).
535+
//
536+
// Validation against this limit and [CELSelectorExpressionMaxLength] happens
537+
// only when setting an expression for the first time or when changing it. If
538+
// the limits are changed in a future Kubernetes release, existing users are
539+
// guaranteed that existing expressions will continue to be valid.
540+
//
541+
// However, the kube-scheduler also applies this cost limit at runtime, so it
542+
// could happen that a valid expression fails at runtime after an up- or
543+
// downgrade. This can also happen without version skew when the cost estimate
544+
// underestimated the actual cost. That this might happen is the reason why
545+
// kube-scheduler enforces the runtime limit instead of relying on validation.
546+
//
547+
// According to
548+
// https://github.com/kubernetes/kubernetes/blob/4aeaf1e99e82da8334c0d6dddd848a194cd44b4f/staging/src/k8s.io/apiserver/pkg/apis/cel/config.go#L20-L22,
549+
// this gives roughly 0.1 second for each expression evaluation.
550+
// However, this depends on how fast the machine is.
551+
const CELSelectorExpressionMaxCost = 1000000
552+
553+
// CELSelectorExpressionMaxLength is the maximum length of a CEL selector expression string.
554+
const CELSelectorExpressionMaxLength = 10 * 1024
555+
524556
// DeviceConstraint must have exactly one field set besides Requests.
525557
type DeviceConstraint struct {
526558
// Requests is a list of the one or more requests in this claim which

pkg/apis/resource/validation/validation.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,19 @@ func validateCELSelector(celSelector resource.CELDeviceSelector, fldPath *field.
167167
if stored {
168168
envType = environment.StoredExpressions
169169
}
170-
result := dracel.GetCompiler().CompileCELExpression(celSelector.Expression, envType)
170+
if len(celSelector.Expression) > resource.CELSelectorExpressionMaxLength {
171+
allErrs = append(allErrs, field.TooLongMaxLength(fldPath.Child("expression"), "<value omitted>", resource.CELSelectorExpressionMaxLength))
172+
// Don't bother compiling too long expressions.
173+
return allErrs
174+
}
175+
176+
result := dracel.GetCompiler().CompileCELExpression(celSelector.Expression, dracel.Options{EnvType: &envType})
171177
if result.Error != nil {
172178
allErrs = append(allErrs, convertCELErrorToValidationError(fldPath.Child("expression"), celSelector.Expression, result.Error))
179+
} else if result.MaxCost > resource.CELSelectorExpressionMaxCost {
180+
allErrs = append(allErrs, field.Forbidden(fldPath.Child("expression"), "too complex, exceeds cost limit"))
173181
}
182+
174183
return allErrs
175184
}
176185

pkg/apis/resource/validation/validation_resourceclaim_test.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package validation
1818

1919
import (
2020
"fmt"
21+
"strings"
2122
"testing"
2223

2324
"github.com/stretchr/testify/assert"
@@ -316,6 +317,48 @@ func TestValidateClaim(t *testing.T) {
316317
return claim
317318
}(),
318319
},
320+
"CEL-length": {
321+
wantFailures: field.ErrorList{
322+
field.TooLongMaxLength(field.NewPath("spec", "devices", "requests").Index(1).Child("selectors").Index(1).Child("cel", "expression"), "<value omitted>", resource.CELSelectorExpressionMaxLength),
323+
},
324+
claim: func() *resource.ResourceClaim {
325+
claim := testClaim(goodName, goodNS, validClaimSpec)
326+
claim.Spec.Devices.Requests = append(claim.Spec.Devices.Requests, claim.Spec.Devices.Requests[0])
327+
claim.Spec.Devices.Requests[1].Name += "-2"
328+
expression := `device.driver == ""`
329+
claim.Spec.Devices.Requests[1].Selectors = []resource.DeviceSelector{
330+
{
331+
// Good selector.
332+
CEL: &resource.CELDeviceSelector{
333+
Expression: strings.ReplaceAll(expression, `""`, `"`+strings.Repeat("x", resource.CELSelectorExpressionMaxLength-len(expression))+`"`),
334+
},
335+
},
336+
{
337+
// Too long by one selector.
338+
CEL: &resource.CELDeviceSelector{
339+
Expression: strings.ReplaceAll(expression, `""`, `"`+strings.Repeat("x", resource.CELSelectorExpressionMaxLength-len(expression)+1)+`"`),
340+
},
341+
},
342+
}
343+
return claim
344+
}(),
345+
},
346+
"CEL-cost": {
347+
wantFailures: field.ErrorList{
348+
field.Forbidden(field.NewPath("spec", "devices", "requests").Index(0).Child("selectors").Index(0).Child("cel", "expression"), "too complex, exceeds cost limit"),
349+
},
350+
claim: func() *resource.ResourceClaim {
351+
claim := testClaim(goodName, goodNS, validClaimSpec)
352+
claim.Spec.Devices.Requests[0].Selectors = []resource.DeviceSelector{
353+
{
354+
CEL: &resource.CELDeviceSelector{
355+
Expression: `device.attributes["dra.example.com"].map(s, s.lowerAscii()).map(s, s.size()).sum() == 0`,
356+
},
357+
},
358+
}
359+
return claim
360+
}(),
361+
},
319362
}
320363

321364
for name, scenario := range scenarios {

pkg/generated/openapi/zz_generated.openapi.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

staging/src/k8s.io/api/resource/v1alpha3/generated.proto

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)