Skip to content

Commit 2cc3dbf

Browse files
committed
DRA CEL: add missing size estimator
Not implementing a size estimator had the effect that strings retrieved from the attributes were treated as "unknown size", leading to wildly overestimating the cost and validation errors even for even simple expressions like this: device.attributes["qat.intel.com"].services.matches("[^a]?sym") Maximum number of elements in maps and the maximum length of the driver name string were also ignored resp. missing. Pre-defined types like apiservercel.StringType must be avoided because they are defined as having a zero maximum size.
1 parent f3cbd79 commit 2cc3dbf

File tree

6 files changed

+178
-32
lines changed

6 files changed

+178
-32
lines changed

pkg/apis/resource/types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ type ResourceSliceSpec struct {
141141
Devices []Device
142142
}
143143

144+
// DriverNameMaxLength is the maximum valid length of a driver name in the
145+
// ResourceSliceSpec and other places. It's the same as for CSI driver names.
146+
const DriverNameMaxLength = 63
147+
144148
// ResourcePool describes the pool that ResourceSlices belong to.
145149
type ResourcePool struct {
146150
// Name is used to identify the pool. For node-local devices, this

pkg/apis/resource/validation/validation_resourceclaim_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,8 @@ func TestValidateClaim(t *testing.T) {
529529
claim.Spec.Devices.Requests[0].Selectors = []resource.DeviceSelector{
530530
{
531531
CEL: &resource.CELDeviceSelector{
532-
Expression: `device.attributes["dra.example.com"].map(s, s.lowerAscii()).map(s, s.size()).sum() == 0`,
532+
// From https://github.com/kubernetes/kubernetes/blob/50fc400f178d2078d0ca46aee955ee26375fc437/test/integration/apiserver/cel/validatingadmissionpolicy_test.go#L2150.
533+
Expression: `[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(x, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(y, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z2, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z3, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z4, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z5, int('1'.find('[0-9]*')) < 100)))))))`,
533534
},
534535
},
535536
}

staging/src/k8s.io/api/resource/v1alpha3/types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,10 @@ type ResourceSliceSpec struct {
145145
Devices []Device `json:"devices" protobuf:"bytes,6,name=devices"`
146146
}
147147

148+
// DriverNameMaxLength is the maximum valid length of a driver name in the
149+
// ResourceSliceSpec and other places. It's the same as for CSI driver names.
150+
const DriverNameMaxLength = 63
151+
148152
// ResourcePool describes the pool that ResourceSlices belong to.
149153
type ResourcePool struct {
150154
// Name is used to identify the pool. For node-local devices, this

staging/src/k8s.io/api/resource/v1beta1/types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ type ResourceSliceSpec struct {
144144
Devices []Device `json:"devices" protobuf:"bytes,6,name=devices"`
145145
}
146146

147+
// DriverNameMaxLength is the maximum valid length of a driver name in the
148+
// ResourceSliceSpec and other places. It's the same as for CSI driver names.
149+
const DriverNameMaxLength = 63
150+
147151
// ResourcePool describes the pool that ResourceSlices belong to.
148152
type ResourcePool struct {
149153
// Name is used to identify the pool. For node-local devices, this

staging/src/k8s.io/dynamic-resource-allocation/cel/compile.go

Lines changed: 105 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"github.com/blang/semver/v4"
2828
"github.com/google/cel-go/cel"
29+
"github.com/google/cel-go/checker"
2930
"github.com/google/cel-go/common/types"
3031
"github.com/google/cel-go/common/types/ref"
3132
"github.com/google/cel-go/common/types/traits"
@@ -50,6 +51,23 @@ const (
5051
var (
5152
lazyCompilerInit sync.Once
5253
lazyCompiler *compiler
54+
55+
// A variant of AnyType = https://github.com/kubernetes/kubernetes/blob/ec2e0de35a298363872897e5904501b029817af3/staging/src/k8s.io/apiserver/pkg/cel/types.go#L550:
56+
// unknown actual type (could be bool, int, string, etc.) but with a known maximum size.
57+
attributeType = withMaxElements(apiservercel.AnyType, resourceapi.DeviceAttributeMaxValueLength)
58+
59+
// Other strings also have a known maximum size.
60+
domainType = withMaxElements(apiservercel.StringType, resourceapi.DeviceMaxDomainLength)
61+
idType = withMaxElements(apiservercel.StringType, resourceapi.DeviceMaxIDLength)
62+
driverType = withMaxElements(apiservercel.StringType, resourceapi.DriverNameMaxLength)
63+
64+
// Each map is bound by the maximum number of different attributes.
65+
innerAttributesMapType = apiservercel.NewMapType(idType, attributeType, resourceapi.ResourceSliceMaxAttributesAndCapacitiesPerDevice)
66+
outerAttributesMapType = apiservercel.NewMapType(domainType, innerAttributesMapType, resourceapi.ResourceSliceMaxAttributesAndCapacitiesPerDevice)
67+
68+
// Same for capacity.
69+
innerCapacityMapType = apiservercel.NewMapType(idType, apiservercel.QuantityDeclType, resourceapi.ResourceSliceMaxAttributesAndCapacitiesPerDevice)
70+
outerCapacityMapType = apiservercel.NewMapType(domainType, innerCapacityMapType, resourceapi.ResourceSliceMaxAttributesAndCapacitiesPerDevice)
5371
)
5472

5573
func GetCompiler() *compiler {
@@ -85,11 +103,12 @@ type Device struct {
85103
}
86104

87105
type compiler struct {
88-
envset *environment.EnvSet
89-
}
90-
91-
func newCompiler() *compiler {
92-
return &compiler{envset: mustBuildEnv()}
106+
// deviceType is a definition for the type of the `device` variable.
107+
// This is needed for the cost estimator. Both are currently version-independent.
108+
// If that ever changes, some additional logic might be needed to make
109+
// cost estimates version-dependent.
110+
deviceType *apiservercel.DeclType
111+
envset *environment.EnvSet
93112
}
94113

95114
// Options contains several additional parameters
@@ -124,7 +143,7 @@ func (c compiler) CompileCELExpression(expression string, options Options) Compi
124143

125144
// We don't have a SizeEstimator. The potential size of the input (= a
126145
// device) is already declared in the definition of the environment.
127-
estimator := &library.CostEstimator{}
146+
estimator := c.newCostEstimator()
128147

129148
ast, issues := env.Compile(expression)
130149
if issues != nil {
@@ -169,6 +188,10 @@ func (c compiler) CompileCELExpression(expression string, options Options) Compi
169188
return compilationResult
170189
}
171190

191+
func (c *compiler) newCostEstimator() *library.CostEstimator {
192+
return &library.CostEstimator{SizeEstimator: &sizeEstimator{compiler: c}}
193+
}
194+
172195
// getAttributeValue returns the native representation of the one value that
173196
// should be stored in the attribute, otherwise an error. An error is
174197
// also returned when there is no supported value.
@@ -241,7 +264,7 @@ func (c CompilationResult) DeviceMatches(ctx context.Context, input Device) (boo
241264
return resultBool, details, nil
242265
}
243266

244-
func mustBuildEnv() *environment.EnvSet {
267+
func newCompiler() *compiler {
245268
envset := environment.MustBaseEnvSet(environment.DefaultCompatibilityVersion(), true /* strictCost */)
246269
field := func(name string, declType *apiservercel.DeclType, required bool) *apiservercel.DeclField {
247270
return apiservercel.NewDeclField(name, declType, required, nil, nil)
@@ -253,10 +276,11 @@ func mustBuildEnv() *environment.EnvSet {
253276
}
254277
return result
255278
}
279+
256280
deviceType := apiservercel.NewObjectType("kubernetes.DRADevice", fields(
257-
field(driverVar, apiservercel.StringType, true),
258-
field(attributesVar, apiservercel.NewMapType(apiservercel.StringType, apiservercel.NewMapType(apiservercel.StringType, apiservercel.AnyType, resourceapi.ResourceSliceMaxAttributesAndCapacitiesPerDevice), resourceapi.ResourceSliceMaxAttributesAndCapacitiesPerDevice), true),
259-
field(capacityVar, apiservercel.NewMapType(apiservercel.StringType, apiservercel.NewMapType(apiservercel.StringType, apiservercel.QuantityDeclType, resourceapi.ResourceSliceMaxAttributesAndCapacitiesPerDevice), resourceapi.ResourceSliceMaxAttributesAndCapacitiesPerDevice), true),
281+
field(driverVar, driverType, true),
282+
field(attributesVar, outerAttributesMapType, true),
283+
field(capacityVar, outerCapacityMapType, true),
260284
))
261285

262286
versioned := []environment.VersionedOptions{
@@ -284,7 +308,13 @@ func mustBuildEnv() *environment.EnvSet {
284308
if err != nil {
285309
panic(fmt.Errorf("internal error building CEL environment: %w", err))
286310
}
287-
return envset
311+
return &compiler{envset: envset, deviceType: deviceType}
312+
}
313+
314+
func withMaxElements(in *apiservercel.DeclType, maxElements uint64) *apiservercel.DeclType {
315+
out := *in
316+
out.MaxElements = int64(maxElements)
317+
return &out
288318
}
289319

290320
// parseQualifiedName splits into domain and identified, using the default domain
@@ -322,3 +352,67 @@ func (m mapper) Find(key ref.Val) (ref.Val, bool) {
322352

323353
return m.defaultValue, true
324354
}
355+
356+
// sizeEstimator tells the cost estimator the maximum size of maps or strings accessible through the `device` variable.
357+
// Without this, the maximum string size of e.g. `device.attributes["dra.example.com"].services` would be unknown.
358+
//
359+
// sizeEstimator is derived from the sizeEstimator in k8s.io/apiextensions-apiserver/pkg/apiserver/schema/cel.
360+
type sizeEstimator struct {
361+
compiler *compiler
362+
}
363+
364+
func (s *sizeEstimator) EstimateSize(element checker.AstNode) *checker.SizeEstimate {
365+
path := element.Path()
366+
if len(path) == 0 {
367+
// Path() can return an empty list, early exit if it does since we can't
368+
// provide size estimates when that happens
369+
return nil
370+
}
371+
372+
// The estimator provides information about the environment's variable(s).
373+
var currentNode *apiservercel.DeclType
374+
switch path[0] {
375+
case deviceVar:
376+
currentNode = s.compiler.deviceType
377+
default:
378+
// Unknown root, shouldn't happen.
379+
return nil
380+
}
381+
382+
// Cut off initial variable from path, it was checked above.
383+
for _, name := range path[1:] {
384+
switch name {
385+
case "@items", "@values":
386+
if currentNode.ElemType == nil {
387+
return nil
388+
}
389+
currentNode = currentNode.ElemType
390+
case "@keys":
391+
if currentNode.KeyType == nil {
392+
return nil
393+
}
394+
currentNode = currentNode.KeyType
395+
default:
396+
field, ok := currentNode.Fields[name]
397+
if !ok {
398+
// If this is an attribute map, then we know that all elements
399+
// have the same maximum size as set in attributeType, regardless
400+
// of their name.
401+
if currentNode.ElemType == attributeType {
402+
currentNode = attributeType
403+
continue
404+
}
405+
return nil
406+
}
407+
if field.Type == nil {
408+
return nil
409+
}
410+
currentNode = field.Type
411+
}
412+
}
413+
return &checker.SizeEstimate{Min: 0, Max: uint64(currentNode.MaxElements)}
414+
}
415+
416+
func (s *sizeEstimator) EstimateCallCost(function, overloadID string, target *checker.AstNode, args []checker.AstNode) *checker.CallEstimate {
417+
return nil
418+
}

staging/src/k8s.io/dynamic-resource-allocation/cel/compile_test.go

Lines changed: 59 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,48 @@ device.attributes["dra.example.com"]["version"].isGreaterThan(semver("0.0.1"))
203203
expectMatch: true,
204204
expectCost: 12,
205205
},
206+
"check_attribute_domains": {
207+
expression: `device.attributes.exists_one(x, x == "dra.example.com")`,
208+
attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{"services": {StringValue: ptr.To("some_example_value")}},
209+
driver: "dra.example.com",
210+
expectMatch: true,
211+
expectCost: 164,
212+
},
213+
"check_attribute_ids": {
214+
expression: `device.attributes["dra.example.com"].exists_one(x, x == "services")`,
215+
attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{"services": {StringValue: ptr.To("some_example_value")}},
216+
driver: "dra.example.com",
217+
expectMatch: true,
218+
expectCost: 133,
219+
},
220+
"split_attribute": {
221+
expression: `device.attributes["dra.example.com"].services.split("example").size() >= 2`,
222+
attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{"services": {StringValue: ptr.To("some_example_value")}},
223+
driver: "dra.example.com",
224+
expectMatch: true,
225+
expectCost: 19,
226+
},
227+
"regexp_attribute": {
228+
expression: `device.attributes["dra.example.com"].services.matches("[^a]?sym")`,
229+
attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{"services": {StringValue: ptr.To("asymetric")}},
230+
driver: "dra.example.com",
231+
expectMatch: true,
232+
expectCost: 18,
233+
},
234+
"check_capacity_domains": {
235+
expression: `device.capacity.exists_one(x, x == "dra.example.com")`,
236+
capacity: map[resourceapi.QualifiedName]resourceapi.DeviceCapacity{"memory": {Value: resource.MustParse("1Mi")}},
237+
driver: "dra.example.com",
238+
expectMatch: true,
239+
expectCost: 164,
240+
},
241+
"check_capacity_ids": {
242+
expression: `device.capacity["dra.example.com"].exists_one(x, x == "memory")`,
243+
capacity: map[resourceapi.QualifiedName]resourceapi.DeviceCapacity{"memory": {Value: resource.MustParse("1Mi")}},
244+
driver: "dra.example.com",
245+
expectMatch: true,
246+
expectCost: 133,
247+
},
206248
"expensive": {
207249
// The worst-case is based on the maximum number of
208250
// attributes and the maximum attribute name length.
@@ -214,21 +256,18 @@ device.attributes["dra.example.com"]["version"].isGreaterThan(semver("0.0.1"))
214256
attribute := resourceapi.DeviceAttribute{
215257
StringValue: ptr.To("abc"),
216258
}
217-
// If the cost estimate was accurate, using exactly as many attributes
218-
// as allowed at most should exceed the limit. In practice, the estimate
219-
// is an upper bound and significantly more attributes are needed before
220-
// the runtime cost becomes too large.
221-
for i := 0; i < 1000*resourceapi.ResourceSliceMaxAttributesAndCapacitiesPerDevice; i++ {
259+
for i := 0; i < resourceapi.ResourceSliceMaxAttributesAndCapacitiesPerDevice; i++ {
222260
suffix := fmt.Sprintf("-%d", i)
223261
name := prefix + strings.Repeat("x", resourceapi.DeviceMaxIDLength-len(suffix)) + suffix
224262
attributes[resourceapi.QualifiedName(name)] = attribute
225263
}
226264
return attributes
227265
}(),
228-
expression: `device.attributes["dra.example.com"].map(s, s.lowerAscii()).map(s, s.size()).sum() == 0`,
266+
// From https://github.com/kubernetes/kubernetes/blob/50fc400f178d2078d0ca46aee955ee26375fc437/test/integration/apiserver/cel/validatingadmissionpolicy_test.go#L2150.
267+
expression: `[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(x, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(y, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z2, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z3, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z4, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z5, int('1'.find('[0-9]*')) < 100)))))))`,
229268
driver: "dra.example.com",
230269
expectMatchError: "actual cost limit exceeded",
231-
expectCost: 18446744073709551615, // Exceeds limit!
270+
expectCost: 85555551, // Exceed limit!
232271
},
233272
}
234273

@@ -238,50 +277,50 @@ func TestCEL(t *testing.T) {
238277
_, ctx := ktesting.NewTestContext(t)
239278
result := GetCompiler().CompileCELExpression(scenario.expression, Options{})
240279
if scenario.expectCompileError != "" && result.Error == nil {
241-
t.Fatalf("expected compile error %q, got none", scenario.expectCompileError)
280+
t.Fatalf("FAILURE: expected compile error %q, got none", scenario.expectCompileError)
242281
}
243282
if result.Error != nil {
244283
if scenario.expectCompileError == "" {
245-
t.Fatalf("unexpected compile error: %v", result.Error)
284+
t.Fatalf("FAILURE: unexpected compile error: %v", result.Error)
246285
}
247286
if !strings.Contains(result.Error.Error(), scenario.expectCompileError) {
248-
t.Fatalf("expected compile error to contain %q, but got instead: %v", scenario.expectCompileError, result.Error)
287+
t.Fatalf("FAILURE: expected compile error to contain %q, but got instead: %v", scenario.expectCompileError, result.Error)
249288
}
250289
return
251290
}
252291
if scenario.expectCompileError != "" {
253-
t.Fatalf("expected compile error %q, got none", scenario.expectCompileError)
292+
t.Fatalf("FAILURE: expected compile error %q, got none", scenario.expectCompileError)
254293
}
255294
if expect, actual := scenario.expectCost, result.MaxCost; expect != actual {
256-
t.Errorf("expected CEL cost %d, got %d instead", expect, actual)
295+
t.Errorf("ERROR: expected CEL cost %d, got %d instead (%.0f%% of limit %d)", expect, actual, float64(actual)*100.0/float64(resourceapi.CELSelectorExpressionMaxCost), resourceapi.CELSelectorExpressionMaxCost)
257296
}
258297

259298
match, details, err := result.DeviceMatches(ctx, Device{Attributes: scenario.attributes, Capacity: scenario.capacity, Driver: scenario.driver})
260299
// details.ActualCost can be called for nil details, no need to check.
261300
actualCost := ptr.Deref(details.ActualCost(), 0)
262301
if scenario.expectCost > 0 {
263-
t.Logf("actual cost %d, %d%% of worst-case estimate", actualCost, actualCost*100/scenario.expectCost)
302+
t.Logf("actual cost %d, %d%% of worst-case estimate %d", actualCost, actualCost*100/scenario.expectCost, scenario.expectCost)
264303
} else {
265304
t.Logf("actual cost %d, expected zero costs", actualCost)
266-
if actualCost > 0 {
267-
t.Errorf("expected zero costs for (presumably) constant expression %q, got instead %d", scenario.expression, actualCost)
268-
}
305+
}
306+
if actualCost > result.MaxCost {
307+
t.Errorf("ERROR: cost estimate %d underestimated the evaluation cost of %d", result.MaxCost, actualCost)
269308
}
270309

271310
if err != nil {
272311
if scenario.expectMatchError == "" {
273-
t.Fatalf("unexpected evaluation error: %v", err)
312+
t.Fatalf("FAILURE: unexpected evaluation error: %v", err)
274313
}
275314
if !strings.Contains(err.Error(), scenario.expectMatchError) {
276-
t.Fatalf("expected evaluation error to contain %q, but got instead: %v", scenario.expectMatchError, err)
315+
t.Fatalf("FAILURE: expected evaluation error to contain %q, but got instead: %v", scenario.expectMatchError, err)
277316
}
278317
return
279318
}
280319
if scenario.expectMatchError != "" {
281-
t.Fatalf("expected match error %q, got none", scenario.expectMatchError)
320+
t.Fatalf("FAILURE: expected match error %q, got none", scenario.expectMatchError)
282321
}
283322
if match != scenario.expectMatch {
284-
t.Fatalf("expected result %v, got %v", scenario.expectMatch, match)
323+
t.Fatalf("FAILURE: expected result %v, got %v", scenario.expectMatch, match)
285324
}
286325
})
287326
}

0 commit comments

Comments
 (0)