Skip to content

Commit 196b871

Browse files
committed
DRA: added n - 3 job
This currently covers skew testing between 1.32 and 1.35.
1 parent 0117660 commit 196b871

File tree

4 files changed

+388
-0
lines changed

4 files changed

+388
-0
lines changed

config/jobs/kubernetes/sig-node/dra-canary.yaml

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,135 @@ presubmits:
517517
cpu: 2
518518
memory: 6Gi
519519

520+
- name: pull-kubernetes-kind-dra-n-3-canary
521+
cluster: eks-prow-build-cluster
522+
skip_branches:
523+
- release-\d+\.\d+ # per-release image
524+
always_run: false
525+
optional: true
526+
labels:
527+
preset-service-account: "true"
528+
preset-dind-enabled: "true"
529+
preset-kind-volume-mounts: "true"
530+
annotations:
531+
testgrid-dashboards: sig-node-dynamic-resource-allocation, sig-node-presubmits
532+
description: Runs E2E tests for Dynamic Resource Allocation beta features against a Kubernetes master cluster created with sigs.k8s.io/kind with kubelet from the "current - 3" release.
533+
testgrid-alert-email: [email protected], [email protected]
534+
decorate: true
535+
decoration_config:
536+
timeout: 90m
537+
path_alias: k8s.io/kubernetes
538+
spec:
539+
containers:
540+
- image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250815-171060767f-master
541+
command:
542+
- runner.sh
543+
args:
544+
- /bin/bash
545+
- -xce
546+
- |
547+
set -o pipefail
548+
# A presubmit job uses the checked out and merged source code.
549+
revision=$(git describe --tags)
550+
kind_yaml_cmd=(cat test/e2e/dra/kind.yaml)
551+
kind_node_source=.
552+
features=( )
553+
make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test"
554+
ginkgo=_output/bin/ginkgo
555+
e2e_test=_output/bin/e2e.test
556+
# The latest kind is assumed to work also for older release branches, should this job get forked.
557+
curl --fail --silent --show-error --location https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind
558+
kind build node-image --image=dra/node:latest "${kind_node_source}"
559+
GINKGO_E2E_PID=
560+
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -TERM "${GINKGO_E2E_PID}"; fi' TERM
561+
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -INT "${GINKGO_E2E_PID}"; fi' INT
562+
# The final kind.yaml the result of getting the original kind.yaml, manipulating it with sed,
563+
# and adding something at the end.
564+
(
565+
${kind_yaml_cmd[@]}
566+
567+
# Additional features are not in kind.yaml, but they can be added at the end.
568+
for feature in ${features[@]}; do echo " ${feature}: true"; done
569+
570+
# Append ClusterConfiguration which causes etcd to use /tmp
571+
# (https://github.com/kubernetes-sigs/kind/issues/845#issuecomment-1261248420).
572+
# There's no kubeadmConfigPatches in any kind.yaml, so we can append at the end.
573+
cat <<EOF
574+
kubeadmConfigPatches:
575+
- |
576+
kind: ClusterConfiguration
577+
etcd:
578+
local:
579+
dataDir: /tmp/etcd
580+
EOF
581+
) >/tmp/kind.yaml
582+
cat /tmp/kind.yaml
583+
kind create cluster --retain --config /tmp/kind.yaml --image dra/node:latest
584+
atexit () {
585+
kind export logs "${ARTIFACTS}/kind"
586+
kind delete cluster
587+
}
588+
trap atexit EXIT
589+
590+
# Replace the kubelet binary and restart it, as in https://gist.github.com/aojea/2c94034f8e86d08842e5916231eb3fe1
591+
# and https://github.com/kubernetes/test-infra/blob/9cccc25265537e8dfa556688cf10754622014424/experiment/compatibility-versions/emulated-version-upgrade.sh#L56-L66.
592+
major=$(echo "$revision" | sed -e 's/^v\([0-9]*\).*/\1/')
593+
minor=$(echo "$revision" | sed -e 's/^v[0-9]*\.\([0-9]*\).*/\1/')
594+
previous_minor=$((minor - 3))
595+
# Test with the stable release to avoid breaking presubmits because of unrelated issues in a release candidate.
596+
# Ask curl to append the HTTP status code after the response body (-w ' %{http_code}').
597+
# Then parse the output using Bash parameter expansion:
598+
# ${response% *} → everything before the last space (the body)
599+
# ${response##* } → everything after the last space (the HTTP code)
600+
response=$(curl --silent -w ' %{http_code}' -L "https://dl.k8s.io/release/stable-$major.$previous_minor.txt" )
601+
previous="${response% *}"
602+
status="${response##* }"
603+
if [ "$status" == 404 ] ; then
604+
# if stable doesn't exist - use latest
605+
response=$(curl --silent -w ' %{http_code}' -L "https://dl.k8s.io/release/latest-$major.$previous_minor.txt" )
606+
previous="${response% *}"
607+
status="${response##* }"
608+
fi
609+
if [ "$status" -ne 200 ] ; then
610+
echo "error: unable to get release $major.$previous_minor info, HTTP status: $status, response: $previous"
611+
exit 1
612+
fi
613+
curl --silent -L "https://dl.k8s.io/release/$previous/kubernetes-server-linux-amd64.tar.gz" | tar zxOf - kubernetes/server/bin/kubelet >/tmp/kubelet
614+
chmod a+rx /tmp/kubelet
615+
/tmp/kubelet --version
616+
worker_nodes=$(kind get nodes | grep worker)
617+
for n in $worker_nodes; do
618+
docker cp /tmp/kubelet $n:/usr/bin/kubelet
619+
docker exec $n systemctl restart kubelet
620+
done
621+
622+
# We need support for disabling tests which need a recent kubelet.
623+
# If a test is labeled with `KubeletMinVersion:1.34`, then it cannot run
624+
# when the deployed kubelet is 1.32. This is enforced by
625+
# generating `! KubeletMinVersion: containsAny { 1.33, 1.34 }`, i.e.
626+
# including all unsupportd kubelet versions in a deny list.
627+
kubelet_label_filter=" && ! KubeletMinVersion: containsAny { $( for v in $(seq $((previous_minor + 1)) $((minor - 1))); do echo "1.$v, "; done)1.$minor }"
628+
629+
# Running tests which only cover control plane behavior are not useful
630+
# in a kubelet version skew job. We can filter them out by including
631+
# only tests which have the DynamicResourceAllocation feature because
632+
# only those cover kubelet behavior.
633+
kubelet_label_filter+=" && Feature: containsAny DynamicResourceAllocation"
634+
635+
KUBECONFIG=${HOME}/.kube/config ${ginkgo} run --nodes=8 --timeout=24h --silence-skips --force-newlines --no-color --label-filter="DRA && Feature: isSubsetOf { OffByDefault, DynamicResourceAllocation } && !FeatureGate:ResourceHealthStatus$kubelet_label_filter && !Alpha && !Flaky && !Slow" ${e2e_test} -- -provider=local -report-dir="${ARTIFACTS}" -report-complete-ginkgo -report-complete-junit &
636+
GINKGO_E2E_PID=$!
637+
wait "${GINKGO_E2E_PID}"
638+
# docker-in-docker needs privileged mode
639+
securityContext:
640+
privileged: true
641+
resources:
642+
limits:
643+
cpu: 2
644+
memory: 6Gi
645+
requests:
646+
cpu: 2
647+
memory: 6Gi
648+
520649
- name: pull-kubernetes-dra-integration-canary
521650
cluster: eks-prow-build-cluster
522651
skip_branches:

config/jobs/kubernetes/sig-node/dra-ci.yaml

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,123 @@ periodics:
409409
cpu: 2
410410
memory: 6Gi
411411

412+
- name: ci-kind-dra-n-3
413+
cluster: eks-prow-build-cluster
414+
interval: 6h
415+
labels:
416+
preset-service-account: "true"
417+
preset-dind-enabled: "true"
418+
preset-kind-volume-mounts: "true"
419+
annotations:
420+
testgrid-dashboards: sig-node-dynamic-resource-allocation
421+
description: Runs E2E tests for Dynamic Resource Allocation beta features against a Kubernetes master cluster created with sigs.k8s.io/kind with kubelet from the "current - 3" release.
422+
testgrid-alert-email: [email protected], [email protected]
423+
fork-per-release: "true"
424+
fork-per-release-periodic-interval: 24h
425+
fork-per-release-replacements: latest-fast.txt -> latest-{{.Version}}.txt, https://dl.k8s.io/ci/fast -> https://dl.k8s.io/ci
426+
decorate: true
427+
decoration_config:
428+
timeout: 90m
429+
spec:
430+
containers:
431+
- image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250815-171060767f-master
432+
command:
433+
- runner.sh
434+
args:
435+
- /bin/bash
436+
- -xce
437+
- |
438+
set -o pipefail
439+
# A CI job uses pre-built release artifacts and pulls necessary source files from GitHub.
440+
revision=$(curl --fail --silent --show-error --location https://dl.k8s.io/ci/fast/latest-fast.txt)
441+
# Report what was tested.
442+
echo "{\"revision\":\"$revision\"}" >"${ARTIFACTS}/metadata.json"
443+
# git hash from e.g. v1.33.0-alpha.1.161+e62ce1c9db2dad
444+
hash=${revision/*+/}
445+
kind_yaml_cmd=(curl --fail --silent --show-error --location "https://raw.githubusercontent.com/kubernetes/kubernetes/$hash/test/e2e/dra/kind.yaml")
446+
kind_node_source="https://dl.k8s.io/ci/fast/$revision/kubernetes-server-linux-amd64.tar.gz"
447+
features=( )
448+
curl --fail --silent --show-error --location "https://dl.k8s.io/ci/fast/$revision/kubernetes-test-linux-amd64.tar.gz" | tar zxvf -
449+
ginkgo=kubernetes/test/bin/ginkgo
450+
e2e_test=kubernetes/test/bin/e2e.test
451+
# The latest kind is assumed to work also for older release branches, should this job get forked.
452+
curl --fail --silent --show-error --location https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind
453+
kind build node-image --image=dra/node:latest "${kind_node_source}"
454+
GINKGO_E2E_PID=
455+
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -TERM "${GINKGO_E2E_PID}"; fi' TERM
456+
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -INT "${GINKGO_E2E_PID}"; fi' INT
457+
# The final kind.yaml the result of getting the original kind.yaml, manipulating it with sed,
458+
# and adding something at the end.
459+
(
460+
${kind_yaml_cmd[@]}
461+
462+
# Additional features are not in kind.yaml, but they can be added at the end.
463+
for feature in ${features[@]}; do echo " ${feature}: true"; done
464+
465+
# Append ClusterConfiguration which causes etcd to use /tmp
466+
# (https://github.com/kubernetes-sigs/kind/issues/845#issuecomment-1261248420).
467+
# There's no kubeadmConfigPatches in any kind.yaml, so we can append at the end.
468+
cat <<EOF
469+
kubeadmConfigPatches:
470+
- |
471+
kind: ClusterConfiguration
472+
etcd:
473+
local:
474+
dataDir: /tmp/etcd
475+
EOF
476+
) >/tmp/kind.yaml
477+
cat /tmp/kind.yaml
478+
kind create cluster --retain --config /tmp/kind.yaml --image dra/node:latest
479+
atexit () {
480+
kind export logs "${ARTIFACTS}/kind"
481+
kind delete cluster
482+
}
483+
trap atexit EXIT
484+
485+
# Replace the kubelet binary and restart it, as in https://gist.github.com/aojea/2c94034f8e86d08842e5916231eb3fe1
486+
# and https://github.com/kubernetes/test-infra/blob/9cccc25265537e8dfa556688cf10754622014424/experiment/compatibility-versions/emulated-version-upgrade.sh#L56-L66.
487+
major=$(echo "$revision" | sed -e 's/^v\([0-9]*\).*/\1/')
488+
minor=$(echo "$revision" | sed -e 's/^v[0-9]*\.\([0-9]*\).*/\1/')
489+
previous_minor=$((minor - 3))
490+
# Test with the most recent CI build, doesn't even need to be released yet.
491+
# We want to know if those are broken.
492+
previous=$(curl --silent -L "https://dl.k8s.io/ci/latest-$major.$previous_minor.txt" )
493+
curl --silent -L "https://dl.k8s.io/ci/$previous/kubernetes-server-linux-amd64.tar.gz" | tar zxOf - kubernetes/server/bin/kubelet >/tmp/kubelet
494+
chmod a+rx /tmp/kubelet
495+
/tmp/kubelet --version
496+
worker_nodes=$(kind get nodes | grep worker)
497+
for n in $worker_nodes; do
498+
docker cp /tmp/kubelet $n:/usr/bin/kubelet
499+
docker exec $n systemctl restart kubelet
500+
done
501+
502+
# We need support for disabling tests which need a recent kubelet.
503+
# If a test is labeled with `KubeletMinVersion:1.34`, then it cannot run
504+
# when the deployed kubelet is 1.32. This is enforced by
505+
# generating `! KubeletMinVersion: containsAny { 1.33, 1.34 }`, i.e.
506+
# including all unsupportd kubelet versions in a deny list.
507+
kubelet_label_filter=" && ! KubeletMinVersion: containsAny { $( for v in $(seq $((previous_minor + 1)) $((minor - 1))); do echo "1.$v, "; done)1.$minor }"
508+
509+
# Running tests which only cover control plane behavior are not useful
510+
# in a kubelet version skew job. We can filter them out by including
511+
# only tests which have the DynamicResourceAllocation feature because
512+
# only those cover kubelet behavior.
513+
kubelet_label_filter+=" && Feature: containsAny DynamicResourceAllocation"
514+
515+
KUBECONFIG=${HOME}/.kube/config ${ginkgo} run --nodes=8 --timeout=24h --silence-skips --force-newlines --no-color --label-filter="DRA && Feature: isSubsetOf { OffByDefault, DynamicResourceAllocation } && !FeatureGate:ResourceHealthStatus$kubelet_label_filter && !Alpha && !Flaky" ${e2e_test} -- -provider=local -report-dir="${ARTIFACTS}" -report-complete-ginkgo -report-complete-junit &
516+
GINKGO_E2E_PID=$!
517+
wait "${GINKGO_E2E_PID}"
518+
# docker-in-docker needs privileged mode
519+
securityContext:
520+
privileged: true
521+
resources:
522+
limits:
523+
cpu: 2
524+
memory: 6Gi
525+
requests:
526+
cpu: 2
527+
memory: 6Gi
528+
412529
- name: ci-dra-integration
413530
cluster: eks-prow-build-cluster
414531
interval: 6h

0 commit comments

Comments
 (0)