Skip to content

Commit 89440b1

Browse files
committed
DRA: integration tests for prioritized list
This adds dedicated integration tests for the feature to the general test/integration/dra for the API and some minimal testing with the scheduler. It also adds non-performance test cases for scheduler_perf because that is a better place for running through the complete flow (for example, can reuse infrastructure for setting up nodes).
1 parent dfb8ab6 commit 89440b1

File tree

4 files changed

+167
-5
lines changed

4 files changed

+167
-5
lines changed

staging/src/k8s.io/dynamic-resource-allocation/structured/allocator_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1923,7 +1923,7 @@ func TestAllocator(t *testing.T) {
19231923
node: node(node1, region1),
19241924

19251925
expectResults: nil,
1926-
expectError: gomega.MatchError(gomega.ContainSubstring("claim claim-0, request req-0: has subrequests, but the feature is disabled")),
1926+
expectError: gomega.MatchError(gomega.ContainSubstring("claim claim-0, request req-0: has subrequests, but the DRAPrioritizedList feature is disabled")),
19271927
},
19281928
"prioritized-list-multi-request": {
19291929
prioritizedList: true,

test/integration/dra/dra_test.go

Lines changed: 92 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,19 @@ limitations under the License.
1717
package dra
1818

1919
import (
20+
"context"
21+
"errors"
2022
"fmt"
2123
"regexp"
2224
"sort"
2325
"strings"
2426
"testing"
27+
"time"
2528

29+
"github.com/onsi/gomega"
30+
"github.com/onsi/gomega/gstruct"
2631
"github.com/stretchr/testify/assert"
32+
"github.com/stretchr/testify/require"
2733

2834
v1 "k8s.io/api/core/v1"
2935
resourcealphaapi "k8s.io/api/resource/v1alpha3"
@@ -34,10 +40,15 @@ import (
3440
utilfeature "k8s.io/apiserver/pkg/util/feature"
3541
"k8s.io/component-base/featuregate"
3642
featuregatetesting "k8s.io/component-base/featuregate/testing"
43+
"k8s.io/klog/v2"
44+
kubeschedulerconfigv1 "k8s.io/kube-scheduler/config/v1"
3745
kubeapiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing"
3846
"k8s.io/kubernetes/pkg/features"
47+
"k8s.io/kubernetes/pkg/scheduler/apis/config"
48+
kubeschedulerscheme "k8s.io/kubernetes/pkg/scheduler/apis/config/scheme"
3949
st "k8s.io/kubernetes/pkg/scheduler/testing"
4050
"k8s.io/kubernetes/test/integration/framework"
51+
"k8s.io/kubernetes/test/integration/util"
4152
"k8s.io/kubernetes/test/utils/ktesting"
4253
"k8s.io/utils/ptr"
4354
)
@@ -54,11 +65,21 @@ var (
5465
Container("my-container").
5566
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimName: &claimName}).
5667
Obj()
68+
class = &resourceapi.DeviceClass{
69+
ObjectMeta: metav1.ObjectMeta{
70+
Name: className,
71+
},
72+
}
5773
claim = st.MakeResourceClaim().
5874
Name(claimName).
5975
Namespace(namespace).
6076
Request(className).
6177
Obj()
78+
claimPrioritizedList = st.MakeResourceClaim().
79+
Name(claimName).
80+
Namespace(namespace).
81+
RequestWithPrioritizedList(className).
82+
Obj()
6283
)
6384

6485
// createTestNamespace creates a namespace with a name that is derived from the
@@ -106,6 +127,7 @@ func TestDRA(t *testing.T) {
106127
features: map[featuregate.Feature]bool{features.DynamicResourceAllocation: true},
107128
f: func(tCtx ktesting.TContext) {
108129
tCtx.Run("AdminAccess", func(tCtx ktesting.TContext) { testAdminAccess(tCtx, false) })
130+
tCtx.Run("PrioritizedList", func(tCtx ktesting.TContext) { testPrioritizedList(tCtx, false) })
109131
tCtx.Run("Pod", func(tCtx ktesting.TContext) { testPod(tCtx, true) })
110132
},
111133
},
@@ -119,11 +141,13 @@ func TestDRA(t *testing.T) {
119141
// Additional DRA feature gates go here,
120142
// in alphabetical order,
121143
// as needed by tests for them.
122-
features.DRAAdminAccess: true,
144+
features.DRAAdminAccess: true,
145+
features.DRAPrioritizedList: true,
123146
},
124147
f: func(tCtx ktesting.TContext) {
125148
tCtx.Run("AdminAccess", func(tCtx ktesting.TContext) { testAdminAccess(tCtx, true) })
126149
tCtx.Run("Convert", testConvert)
150+
tCtx.Run("PrioritizedList", func(tCtx ktesting.TContext) { testPrioritizedList(tCtx, true) })
127151
},
128152
},
129153
} {
@@ -146,21 +170,43 @@ func TestDRA(t *testing.T) {
146170
etcdOptions := framework.SharedEtcd()
147171
apiServerOptions := kubeapiservertesting.NewDefaultTestServerOptions()
148172
apiServerFlags := framework.DefaultTestServerFlags()
149-
// Default kube-apiserver behavior, must be requested explicitly for test server.
150-
runtimeConfigs := []string{"api/alpha=false", "api/beta=false"}
173+
var runtimeConfigs []string
151174
for key, value := range tc.apis {
152175
runtimeConfigs = append(runtimeConfigs, fmt.Sprintf("%s=%t", key, value))
153176
}
154177
apiServerFlags = append(apiServerFlags, "--runtime-config="+strings.Join(runtimeConfigs, ","))
155178
server := kubeapiservertesting.StartTestServerOrDie(t, apiServerOptions, apiServerFlags, etcdOptions)
156179
tCtx.Cleanup(server.TearDownFn)
157-
158180
tCtx = ktesting.WithRESTConfig(tCtx, server.ClientConfig)
181+
159182
tc.f(tCtx)
160183
})
161184
}
162185
}
163186

187+
func startScheduler(tCtx ktesting.TContext) {
188+
// Run scheduler with default configuration.
189+
tCtx.Log("Scheduler starting...")
190+
schedulerCtx := klog.NewContext(tCtx, klog.LoggerWithName(tCtx.Logger(), "scheduler"))
191+
schedulerCtx, cancel := context.WithCancelCause(schedulerCtx)
192+
_, informerFactory := util.StartScheduler(schedulerCtx, tCtx.Client(), tCtx.RESTConfig(), newDefaultSchedulerComponentConfig(tCtx), nil)
193+
// Stop clients of the apiserver before stopping the apiserver itself,
194+
// otherwise it delays its shutdown.
195+
tCtx.Cleanup(informerFactory.Shutdown)
196+
tCtx.Cleanup(func() {
197+
tCtx.Log("Stoping scheduler...")
198+
cancel(errors.New("test is done"))
199+
})
200+
}
201+
202+
func newDefaultSchedulerComponentConfig(tCtx ktesting.TContext) *config.KubeSchedulerConfiguration {
203+
gvk := kubeschedulerconfigv1.SchemeGroupVersion.WithKind("KubeSchedulerConfiguration")
204+
cfg := config.KubeSchedulerConfiguration{}
205+
_, _, err := kubeschedulerscheme.Codecs.UniversalDecoder().Decode(nil, &gvk, &cfg)
206+
tCtx.ExpectNoError(err, "decode default scheduler configuration")
207+
return &cfg
208+
}
209+
164210
// testPod creates a pod with a resource claim reference and then checks
165211
// whether that field is or isn't getting dropped.
166212
func testPod(tCtx ktesting.TContext, draEnabled bool) {
@@ -220,3 +266,45 @@ func testAdminAccess(tCtx ktesting.TContext, adminAccessEnabled bool) {
220266
}
221267
}
222268
}
269+
270+
func testPrioritizedList(tCtx ktesting.TContext, enabled bool) {
271+
tCtx.Parallel()
272+
_, err := tCtx.Client().ResourceV1beta1().DeviceClasses().Create(tCtx, class, metav1.CreateOptions{})
273+
tCtx.ExpectNoError(err, "create class")
274+
namespace := createTestNamespace(tCtx)
275+
claim := claimPrioritizedList.DeepCopy()
276+
claim.Namespace = namespace
277+
claim, err = tCtx.Client().ResourceV1beta1().ResourceClaims(namespace).Create(tCtx, claim, metav1.CreateOptions{})
278+
279+
if !enabled {
280+
require.Error(tCtx, err, "claim should have become invalid after dropping FirstAvailable")
281+
return
282+
}
283+
284+
require.NotEmpty(tCtx, claim.Spec.Devices.Requests[0].FirstAvailable, "should store FirstAvailable")
285+
tCtx.Run("scheduler", func(tCtx ktesting.TContext) {
286+
startScheduler(tCtx)
287+
288+
// The fake cluster configuration is not complete enough to actually schedule pods.
289+
// That is covered over in test/integration/scheduler_perf.
290+
// Here we only test that we get to the point where it notices that, without failing
291+
// during PreFilter because of FirstAvailable.
292+
pod := podWithClaimName.DeepCopy()
293+
pod.Namespace = namespace
294+
_, err := tCtx.Client().CoreV1().Pods(namespace).Create(tCtx, pod, metav1.CreateOptions{})
295+
tCtx.ExpectNoError(err, "create pod")
296+
schedulingAttempted := gomega.HaveField("Status.Conditions", gomega.ContainElement(
297+
gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
298+
"Type": gomega.Equal(v1.PodScheduled),
299+
"Status": gomega.Equal(v1.ConditionFalse),
300+
"Reason": gomega.Equal("Unschedulable"),
301+
"Message": gomega.Equal("no nodes available to schedule pods"),
302+
}),
303+
))
304+
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) *v1.Pod {
305+
pod, err := tCtx.Client().CoreV1().Pods(namespace).Get(tCtx, pod.Name, metav1.GetOptions{})
306+
tCtx.ExpectNoError(err, "get pod")
307+
return pod
308+
}).WithTimeout(time.Minute).WithPolling(time.Second).Should(schedulingAttempted)
309+
})
310+
}

test/integration/scheduler_perf/dra/performance-config.yaml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,66 @@
294294
maxClaimsPerNode: 10
295295
duration: 10s
296296

297+
# SteadyStateResourceClaimTemplateFirstAvailable is a variant of SteadyStateResourceClaimTemplate
298+
# with a claim template that uses the "firstAvailable" subrequests, aka DRAPrioritizedList.
299+
- name: SteadyStateClusterResourceClaimTemplateFirstAvailable
300+
featureGates:
301+
DynamicResourceAllocation: true
302+
DRAPrioritizedList: true
303+
workloadTemplate:
304+
- opcode: createNodes
305+
countParam: $nodesWithoutDRA
306+
- opcode: createNodes
307+
nodeTemplatePath: templates/node-with-dra-test-driver.yaml
308+
countParam: $nodesWithDRA
309+
- opcode: createResourceDriver
310+
driverName: test-driver.cdi.k8s.io
311+
nodes: scheduler-perf-dra-*
312+
maxClaimsPerNodeParam: $maxClaimsPerNode
313+
- opcode: createAny
314+
templatePath: templates/deviceclass.yaml
315+
- opcode: createAny
316+
templatePath: templates/resourceclaim.yaml
317+
countParam: $initClaims
318+
namespace: init
319+
- opcode: allocResourceClaims
320+
namespace: init
321+
- opcode: createAny
322+
templatePath: templates/resourceclaimtemplate-first-available.yaml
323+
namespace: test
324+
- opcode: createPods
325+
namespace: test
326+
count: 10
327+
steadyState: true
328+
durationParam: $duration
329+
podTemplatePath: templates/pod-with-claim-template.yaml
330+
collectMetrics: true
331+
workloads:
332+
- name: fast
333+
featureGates:
334+
SchedulerQueueingHints: false
335+
labels: [integration-test, short]
336+
params:
337+
# This testcase runs through all code paths without
338+
# taking too long overall.
339+
nodesWithDRA: 1
340+
nodesWithoutDRA: 1
341+
initClaims: 0
342+
maxClaimsPerNode: 10
343+
duration: 2s
344+
- name: fast_QueueingHintsEnabled
345+
featureGates:
346+
SchedulerQueueingHints: true
347+
labels: [integration-test, short]
348+
params:
349+
# This testcase runs through all code paths without
350+
# taking too long overall.
351+
nodesWithDRA: 1
352+
nodesWithoutDRA: 1
353+
initClaims: 0
354+
maxClaimsPerNode: 10
355+
duration: 2s
356+
297357
# SchedulingWithResourceClaimTemplate uses ResourceClaims
298358
# with deterministic names that are shared between pods.
299359
# There is a fixed ratio of 1:5 between claims and pods.
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: resource.k8s.io/v1alpha3
2+
kind: ResourceClaimTemplate
3+
metadata:
4+
name: test-claim-template
5+
spec:
6+
spec:
7+
devices:
8+
requests:
9+
- name: req-0
10+
firstAvailable:
11+
- name: sub-0
12+
deviceClassName: no-such-class
13+
- name: sub-1
14+
deviceClassName: test-class

0 commit comments

Comments
 (0)