Skip to content

Commit 2499663

Browse files
committed
DRA E2E: tests for device taints
1 parent 5760a4f commit 2499663

File tree

4 files changed

+113
-3
lines changed

4 files changed

+113
-3
lines changed

test/e2e/dra/deploy.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ type Resources struct {
8787

8888
// Number of devices called "device-000", "device-001", ... on each node or in the cluster.
8989
MaxAllocations int
90+
91+
// Tainted causes all devices to be published with a NoExecute taint.
92+
Tainted bool
9093
}
9194

9295
//go:embed test-driver/deploy/example/plugin-permissions.yaml
@@ -299,10 +302,18 @@ func (d *Driver) SetUp(nodes *Nodes, resources Resources, devicesPerNode ...map[
299302
maxAllocations = 10
300303
}
301304
for i := 0; i < maxAllocations; i++ {
302-
slice.Spec.Devices = append(slice.Spec.Devices, resourceapi.Device{
305+
device := resourceapi.Device{
303306
Name: fmt.Sprintf("device-%d", i),
304307
Basic: &resourceapi.BasicDevice{},
305-
})
308+
}
309+
if resources.Tainted {
310+
device.Basic.Taints = []resourceapi.DeviceTaint{{
311+
Key: "example.com/taint",
312+
Value: "tainted",
313+
Effect: resourceapi.DeviceTaintEffectNoSchedule,
314+
}}
315+
}
316+
slice.Spec.Devices = append(slice.Spec.Devices, device)
306317
}
307318

308319
_, err := d.f.ClientSet.ResourceV1beta1().ResourceSlices().Create(ctx, slice, metav1.CreateOptions{})

test/e2e/dra/dra.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
3636
appsv1 "k8s.io/api/apps/v1"
3737
v1 "k8s.io/api/core/v1"
38+
resourcealphaapi "k8s.io/api/resource/v1alpha3"
3839
resourceapi "k8s.io/api/resource/v1beta1"
3940
apierrors "k8s.io/apimachinery/pkg/api/errors"
4041
"k8s.io/apimachinery/pkg/api/resource"
@@ -1280,6 +1281,88 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
12801281
prioritizedListTests()
12811282
})
12821283

1284+
framework.Context("with device taints", feature.DRADeviceTaints, framework.WithFeatureGate(features.DRADeviceTaints), func() {
1285+
nodes := NewNodes(f, 1, 1)
1286+
driver := NewDriver(f, nodes, func() Resources {
1287+
return Resources{
1288+
Tainted: true,
1289+
}
1290+
})
1291+
b := newBuilder(f, driver)
1292+
1293+
f.It("DeviceTaint keeps pod pending", func(ctx context.Context) {
1294+
pod, template := b.podInline()
1295+
b.create(ctx, pod, template)
1296+
framework.ExpectNoError(e2epod.WaitForPodNameUnschedulableInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name))
1297+
})
1298+
1299+
f.It("DeviceToleration enables pod scheduling", func(ctx context.Context) {
1300+
pod, template := b.podInline()
1301+
template.Spec.Spec.Devices.Requests[0].Tolerations = []resourceapi.DeviceToleration{{
1302+
Effect: resourceapi.DeviceTaintEffectNoSchedule,
1303+
Operator: resourceapi.DeviceTolerationOpExists,
1304+
// No key: tolerate *all* taints with this effect.
1305+
}}
1306+
b.create(ctx, pod, template)
1307+
b.testPod(ctx, f, pod)
1308+
})
1309+
1310+
f.It("DeviceTaintRule evicts pod", func(ctx context.Context) {
1311+
pod, template := b.podInline()
1312+
template.Spec.Spec.Devices.Requests[0].Tolerations = []resourceapi.DeviceToleration{{
1313+
Effect: resourceapi.DeviceTaintEffectNoSchedule,
1314+
Operator: resourceapi.DeviceTolerationOpExists,
1315+
// No key: tolerate *all* taints with this effect.
1316+
}}
1317+
// Add a finalizer to ensure that we get a chance to test the pod status after eviction (= deletion).
1318+
pod.Finalizers = []string{"e2e-test/dont-delete-me"}
1319+
b.create(ctx, pod, template)
1320+
b.testPod(ctx, f, pod)
1321+
ginkgo.DeferCleanup(func(ctx context.Context) {
1322+
// Unblock shutdown by removing the finalizer.
1323+
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(ctx, pod.Name, metav1.GetOptions{})
1324+
framework.ExpectNoError(err, "get pod")
1325+
pod.Finalizers = nil
1326+
_, err = f.ClientSet.CoreV1().Pods(f.Namespace.Name).Update(ctx, pod, metav1.UpdateOptions{})
1327+
framework.ExpectNoError(err, "remove finalizers from pod")
1328+
})
1329+
1330+
// Now evict it.
1331+
ginkgo.By("Evicting pod...")
1332+
taint := &resourcealphaapi.DeviceTaintRule{
1333+
ObjectMeta: metav1.ObjectMeta{
1334+
GenerateName: "device-taint-rule-" + f.UniqueName + "-",
1335+
},
1336+
Spec: resourcealphaapi.DeviceTaintRuleSpec{
1337+
// All devices of the current driver instance.
1338+
DeviceSelector: &resourcealphaapi.DeviceTaintSelector{
1339+
Driver: &driver.Name,
1340+
},
1341+
Taint: resourcealphaapi.DeviceTaint{
1342+
Effect: resourcealphaapi.DeviceTaintEffectNoExecute,
1343+
Key: "test.example.com/evict",
1344+
Value: "now",
1345+
// No TimeAdded, gets defaulted.
1346+
},
1347+
},
1348+
}
1349+
createdTaint := b.create(ctx, taint)
1350+
taint = createdTaint[0].(*resourcealphaapi.DeviceTaintRule)
1351+
gomega.Expect(*taint).Should(gomega.HaveField("Spec.Taint.TimeAdded.Time", gomega.BeTemporally("~", time.Now(), time.Minute /* allow for some clock drift and delays */)))
1352+
1353+
framework.ExpectNoError(e2epod.WaitForPodTerminatingInNamespaceTimeout(ctx, f.ClientSet, pod.Name, f.Namespace.Name, f.Timeouts.PodStart))
1354+
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(ctx, pod.Name, metav1.GetOptions{})
1355+
framework.ExpectNoError(err, "get pod")
1356+
gomega.Expect(pod).Should(gomega.HaveField("Status.Conditions", gomega.ContainElement(gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
1357+
// LastTransitionTime is unknown.
1358+
"Type": gomega.Equal(v1.DisruptionTarget),
1359+
"Status": gomega.Equal(v1.ConditionTrue),
1360+
"Reason": gomega.Equal("DeletionByDeviceTaintManager"),
1361+
"Message": gomega.Equal("Device Taint manager: deleting due to NoExecute taint"),
1362+
}))))
1363+
})
1364+
})
1365+
12831366
// TODO (https://github.com/kubernetes/kubernetes/issues/123699): move most of the test below into `testDriver` so that they get
12841367
// executed with different parameters.
12851368

@@ -2006,6 +2089,12 @@ func (b *builder) create(ctx context.Context, objs ...klog.KMetadata) []klog.KMe
20062089
err := b.f.ClientSet.ResourceV1beta1().ResourceSlices().Delete(ctx, createdObj.GetName(), metav1.DeleteOptions{})
20072090
framework.ExpectNoError(err, "delete node resource slice")
20082091
})
2092+
case *resourcealphaapi.DeviceTaintRule:
2093+
createdObj, err = b.f.ClientSet.ResourceV1alpha3().DeviceTaintRules().Create(ctx, obj, metav1.CreateOptions{})
2094+
ginkgo.DeferCleanup(func(ctx context.Context) {
2095+
err := b.f.ClientSet.ResourceV1alpha3().DeviceTaintRules().Delete(ctx, createdObj.GetName(), metav1.DeleteOptions{})
2096+
framework.ExpectNoError(err, "delete DeviceTaintRule")
2097+
})
20092098
case *appsv1.DaemonSet:
20102099
createdObj, err = b.f.ClientSet.AppsV1().DaemonSets(b.f.Namespace.Name).Create(ctx, obj, metav1.CreateOptions{})
20112100
// Cleanup not really needed, but speeds up namespace shutdown.

test/e2e/dra/kind.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ nodes:
2020
v: "5"
2121
apiServer:
2222
extraArgs:
23-
runtime-config: "resource.k8s.io/v1beta1=true"
23+
runtime-config: "resource.k8s.io/v1alpha3=true,resource.k8s.io/v1beta1=true"
2424
- |
2525
kind: InitConfiguration
2626
nodeRegistration:

test/e2e/feature/feature.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,16 @@ var (
112112
// is enabled such that passing CDI device IDs through CRI fields is supported
113113
DRAAdminAccess = framework.WithFeature(framework.ValidFeatures.Add("DRAAdminAccess"))
114114

115+
// owning-sig: sig-scheduling
116+
// kep: https://kep.k8s.io/5055
117+
// test-infra jobs:
118+
// - "ci-kind-dra-all" in https://testgrid.k8s.io/sig-node-dynamic-resource-allocation
119+
//
120+
// This label is used for tests which need:
121+
// - the DynamicResourceAllocation *and* DRADeviceTaints feature gates
122+
// - the resource.k8s.io API group, including version v1alpha3
123+
DRADeviceTaints = framework.WithFeature(framework.ValidFeatures.Add("DRADeviceTaints"))
124+
115125
// TODO: document the feature (owning SIG, when to use this feature for a test)
116126
// OWNER: sig-node
117127
// Testing downward API huge pages

0 commit comments

Comments
 (0)