Skip to content

Commit e8fcda1

Browse files
committed
DRA canary: experiment with kubelet version skew
The intent is to run the current set of tests against a cluster where the kubelet is from a previous release. Which tests will pass remains to be seen... While at it, avoid repeatedly dumping intermediate kind_yaml variable assignment into the job's log output. Instead, manipulate the content once and dump the final result.
1 parent f5f7403 commit e8fcda1

File tree

3 files changed

+242
-22
lines changed

3 files changed

+242
-22
lines changed

config/jobs/kubernetes/sig-node/dra-canary.yaml

Lines changed: 147 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -32,27 +32,44 @@ presubmits:
3232
- |
3333
set -o pipefail
3434
# A presubmit job uses the checked out and merged source code.
35-
kind_yaml=$(cat test/e2e/dra/kind.yaml)
35+
revision=$(git describe --tags)
36+
kind_yaml_cmd=(cat test/e2e/dra/kind.yaml)
3637
kind_node_source=.
3738
features=( )
3839
make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test"
3940
ginkgo=_output/bin/ginkgo
4041
e2e_test=_output/bin/e2e.test
4142
# The latest kind is assumed to work also for older release branches, should this job get forked.
4243
curl --fail --silent --show-error --location https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind
43-
kind build node-image --image=dra/node:latest "${kind_node_source}"
44+
control_plane_image=dra/node:latest
45+
kind build node-image --image="$control_plane_image" "${kind_node_source}"
46+
worker_image="$control_plane_image"
4447
GINKGO_E2E_PID=
4548
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -TERM "${GINKGO_E2E_PID}"; fi' TERM
4649
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -INT "${GINKGO_E2E_PID}"; fi' INT
47-
# Inject ClusterConfiguration which causes etcd to use /tmp
48-
# (https://github.com/kubernetes-sigs/kind/issues/845#issuecomment-1261248420).
49-
if ! echo "$kind_yaml" | grep -q '^kubeadmConfigPatches:'; then
50-
# Add kubeadmConfigPatches list before node list, there is none at the moment.
51-
kind_yaml=$(echo "$kind_yaml" | sed -e '/nodes:/ i\kubeadmConfigPatches:')
52-
fi
53-
kind_yaml=$(echo "$kind_yaml" | sed -e '/^kubeadmConfigPatches:/ a\- |\n kind: ClusterConfiguration\n etcd:\n local:\n dataDir: /tmp/etcd')
54-
# Additional features are not in kind.yaml, but they can be added at the end.
55-
kind create cluster --retain --config <(echo "${kind_yaml}"; for feature in ${features[@]}; do echo " ${feature}: true"; done) --image dra/node:latest
50+
# The final config gets dumped to stderr of the job.
51+
# It's the result of getting the original kind.yaml, manipulating it with sed,
52+
# and adding something at the end.
53+
kind create cluster --retain --config <( (
54+
${kind_yaml_cmd[@]} |
55+
# Configure potentially different images for control plane and workers.
56+
sed -e "/^- role: control-plane/ a \ image: $control_plane_image" -e "/^- role: worker/ a \ image: $worker_image"
57+
58+
# Additional features are not in kind.yaml, but they can be added at the end.
59+
for feature in ${features[@]}; do echo " ${feature}: true"; done
60+
61+
# Append ClusterConfiguration which causes etcd to use /tmp
62+
# (https://github.com/kubernetes-sigs/kind/issues/845#issuecomment-1261248420).
63+
# There's no kubeadmConfigPatches in any kind.yaml, so we can append at the end.
64+
cat <<EOF
65+
kubeadmConfigPatches:
66+
- |
67+
kind: ClusterConfiguration
68+
etcd:
69+
local:
70+
dataDir: /tmp/etcd
71+
EOF
72+
) | tee /dev/stderr )
5673
atexit () {
5774
kind export logs "${ARTIFACTS}/kind"
5875
kind delete cluster
@@ -101,7 +118,8 @@ presubmits:
101118
- |
102119
set -o pipefail
103120
# A presubmit job uses the checked out and merged source code.
104-
kind_yaml=$(cat test/e2e/dra/kind.yaml)
121+
revision=$(git describe --tags)
122+
kind_yaml_cmd=(cat test/e2e/dra/kind.yaml)
105123
kind_node_source=.
106124
# Which DRA features exist can change over time.
107125
features=( $( grep '"DRA' pkg/features/kube_features.go | sed 's/.*"\(.*\)"/\1/' ) )
@@ -111,19 +129,35 @@ presubmits:
111129
e2e_test=_output/bin/e2e.test
112130
# The latest kind is assumed to work also for older release branches, should this job get forked.
113131
curl --fail --silent --show-error --location https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind
114-
kind build node-image --image=dra/node:latest "${kind_node_source}"
132+
control_plane_image=dra/node:latest
133+
kind build node-image --image="$control_plane_image" "${kind_node_source}"
134+
worker_image="$control_plane_image"
115135
GINKGO_E2E_PID=
116136
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -TERM "${GINKGO_E2E_PID}"; fi' TERM
117137
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -INT "${GINKGO_E2E_PID}"; fi' INT
118-
# Inject ClusterConfiguration which causes etcd to use /tmp
119-
# (https://github.com/kubernetes-sigs/kind/issues/845#issuecomment-1261248420).
120-
if ! echo "$kind_yaml" | grep -q '^kubeadmConfigPatches:'; then
121-
# Add kubeadmConfigPatches list before node list, there is none at the moment.
122-
kind_yaml=$(echo "$kind_yaml" | sed -e '/nodes:/ i\kubeadmConfigPatches:')
123-
fi
124-
kind_yaml=$(echo "$kind_yaml" | sed -e '/^kubeadmConfigPatches:/ a\- |\n kind: ClusterConfiguration\n etcd:\n local:\n dataDir: /tmp/etcd')
125-
# Additional features are not in kind.yaml, but they can be added at the end.
126-
kind create cluster --retain --config <(echo "${kind_yaml}"; for feature in ${features[@]}; do echo " ${feature}: true"; done) --image dra/node:latest
138+
# The final config gets dumped to stderr of the job.
139+
# It's the result of getting the original kind.yaml, manipulating it with sed,
140+
# and adding something at the end.
141+
kind create cluster --retain --config <( (
142+
${kind_yaml_cmd[@]} |
143+
# Configure potentially different images for control plane and workers.
144+
sed -e "/^- role: control-plane/ a \ image: $control_plane_image" -e "/^- role: worker/ a \ image: $worker_image"
145+
146+
# Additional features are not in kind.yaml, but they can be added at the end.
147+
for feature in ${features[@]}; do echo " ${feature}: true"; done
148+
149+
# Append ClusterConfiguration which causes etcd to use /tmp
150+
# (https://github.com/kubernetes-sigs/kind/issues/845#issuecomment-1261248420).
151+
# There's no kubeadmConfigPatches in any kind.yaml, so we can append at the end.
152+
cat <<EOF
153+
kubeadmConfigPatches:
154+
- |
155+
kind: ClusterConfiguration
156+
etcd:
157+
local:
158+
dataDir: /tmp/etcd
159+
EOF
160+
) | tee /dev/stderr )
127161
atexit () {
128162
kind export logs "${ARTIFACTS}/kind"
129163
kind delete cluster
@@ -143,6 +177,97 @@ presubmits:
143177
cpu: 2
144178
memory: 6Gi
145179

180+
- name: pull-kubernetes-kind-dra-n-1-canary
181+
cluster: eks-prow-build-cluster
182+
skip_branches:
183+
- release-\d+\.\d+ # per-release image
184+
always_run: false
185+
optional: true
186+
labels:
187+
preset-service-account: "true"
188+
preset-dind-enabled: "true"
189+
preset-kind-volume-mounts: "true"
190+
annotations:
191+
testgrid-dashboards: sig-node-dynamic-resource-allocation, sig-node-presubmits
192+
description: Runs E2E tests for Dynamic Resource Allocation beta features against a Kubernetes master cluster created with sigs.k8s.io/kind with kubelet from the previous release.
193+
testgrid-alert-email: [email protected], [email protected]
194+
decorate: true
195+
decoration_config:
196+
timeout: 90m
197+
path_alias: k8s.io/kubernetes
198+
spec:
199+
containers:
200+
- image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250527-1b2b10e804-master
201+
command:
202+
- runner.sh
203+
args:
204+
- /bin/bash
205+
- -xce
206+
- |
207+
set -o pipefail
208+
# A presubmit job uses the checked out and merged source code.
209+
revision=$(git describe --tags)
210+
kind_yaml_cmd=(cat test/e2e/dra/kind.yaml)
211+
kind_node_source=.
212+
features=( )
213+
make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test"
214+
ginkgo=_output/bin/ginkgo
215+
e2e_test=_output/bin/e2e.test
216+
# The latest kind is assumed to work also for older release branches, should this job get forked.
217+
curl --fail --silent --show-error --location https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind
218+
control_plane_image=dra/node:latest
219+
kind build node-image --image="$control_plane_image" "${kind_node_source}"
220+
major=$(echo "$revision" | sed -e 's/^v\([0-9]*\).*/\1/')
221+
minor=$(echo "$revision" | sed -e 's/^v[0-9]*\([0-9]*\).*/\1/')
222+
# TODO: find latest patch release
223+
worker_image=dra/node:skewed1
224+
kind build node-image --image="$worker_image" "https://dl.k8s.io/v$major.$((minor - 1)).0/kubernetes-server-linux-amd64.tar.gz"
225+
# We might need support for disabling tests which need a recent kubelet. We'll see...
226+
GINKGO_E2E_PID=
227+
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -TERM "${GINKGO_E2E_PID}"; fi' TERM
228+
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -INT "${GINKGO_E2E_PID}"; fi' INT
229+
# The final config gets dumped to stderr of the job.
230+
# It's the result of getting the original kind.yaml, manipulating it with sed,
231+
# and adding something at the end.
232+
kind create cluster --retain --config <( (
233+
${kind_yaml_cmd[@]} |
234+
# Configure potentially different images for control plane and workers.
235+
sed -e "/^- role: control-plane/ a \ image: $control_plane_image" -e "/^- role: worker/ a \ image: $worker_image"
236+
237+
# Additional features are not in kind.yaml, but they can be added at the end.
238+
for feature in ${features[@]}; do echo " ${feature}: true"; done
239+
240+
# Append ClusterConfiguration which causes etcd to use /tmp
241+
# (https://github.com/kubernetes-sigs/kind/issues/845#issuecomment-1261248420).
242+
# There's no kubeadmConfigPatches in any kind.yaml, so we can append at the end.
243+
cat <<EOF
244+
kubeadmConfigPatches:
245+
- |
246+
kind: ClusterConfiguration
247+
etcd:
248+
local:
249+
dataDir: /tmp/etcd
250+
EOF
251+
) | tee /dev/stderr )
252+
atexit () {
253+
kind export logs "${ARTIFACTS}/kind"
254+
kind delete cluster
255+
}
256+
trap atexit EXIT
257+
KUBECONFIG=${HOME}/.kube/config ${ginkgo} run --nodes=8 --timeout=24h --silence-skips --force-newlines --no-color --label-filter="DRA && Feature: isSubsetOf { OffByDefault, DynamicResourceAllocation } && !Alpha && !Flaky && !Slow" ${e2e_test} -- -provider=local -report-dir="${ARTIFACTS}" -report-complete-ginkgo -report-complete-junit &
258+
GINKGO_E2E_PID=$!
259+
wait "${GINKGO_E2E_PID}"
260+
# docker-in-docker needs privileged mode
261+
securityContext:
262+
privileged: true
263+
resources:
264+
limits:
265+
cpu: 2
266+
memory: 6Gi
267+
requests:
268+
cpu: 2
269+
memory: 6Gi
270+
146271
- name: pull-kubernetes-node-e2e-crio-cgrpv1-dra-canary
147272
cluster: k8s-infra-prow-build
148273
skip_branches:

config/jobs/kubernetes/sig-node/dra.generate.conf

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ timeout = 90m
1919
# Must be sufficiently smaller than the overall job timeout to leave time
2020
# for test setup (compilation, deploying VM) and collecting results.
2121
e2e_node_timeout = 60m
22+
# Values > 0 enable version skew testing with an older kubelet version.
23+
kubelet_skew = 0
2224

2325
# This jobs runs e2e.test with a focus on tests for the Dynamic Resource Allocation feature (currently beta)
2426
# on a kind cluster with containerd updated to a version with CDI support.
@@ -39,6 +41,17 @@ all_features = true
3941
use_dind = true
4042
run_if_changed = /(dra|dynamicresources|resourceclaim|deviceclass|resourceslice|resourceclaimtemplate|dynamic-resource-allocation|pkg/apis/resource|api/resource)/.*.go
4143

44+
# This job runs the current e2e.test against a cluster where the kubelet is from the previous release (n - 1).
45+
#
46+
# It enables and tests the same features as kind-dra.
47+
[kind-dra-n-1]
48+
description = Runs E2E tests for Dynamic Resource Allocation beta features against a Kubernetes master cluster created with sigs.k8s.io/kind with kubelet from the previous release.
49+
use_dind = true
50+
cluster = eks-prow-build-cluster
51+
run_if_changed = /(dra|dynamicresources|resourceclaim|deviceclass|resourceslice|resourceclaimtemplate|dynamic-resource-allocation|pkg/apis/resource|api/resource)/.*.go
52+
kubelet_skew = 1
53+
generate = canary # not ready for periodic yet
54+
4255
# This job runs e2e_node.test with a focus on tests for the Dynamic Resource Allocation feature (currently beta)
4356
[node-e2e-crio-cgrpv1-dra]
4457
job_type = node

config/jobs/kubernetes/sig-node/dra.jinja

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,87 @@ presubmits:
117117
- -xce
118118
- |
119119
set -o pipefail
120+
{%- if canary %}
121+
{%- if ci %}
122+
# A CI job uses pre-built release artifacts and pulls necessary source files from GitHub.
123+
revision=$(curl --fail --silent --show-error --location ${CI_URL}/${LATEST_TXT})
124+
# Report what was tested.
125+
echo "{\"revision\":\"$revision\"}" >"${ARTIFACTS}/metadata.json"
126+
# git hash from e.g. v1.33.0-alpha.1.161+e62ce1c9db2dad
127+
hash=${revision/*+/}
128+
kind_yaml_cmd=(curl --fail --silent --show-error --location "https://raw.githubusercontent.com/kubernetes/kubernetes/$hash/test/e2e/dra/kind.yaml")
129+
kind_node_source="${CI_URL}/$revision/kubernetes-server-linux-amd64.tar.gz"
130+
{%- else %}
131+
# A presubmit job uses the checked out and merged source code.
132+
revision=$(git describe --tags)
133+
kind_yaml_cmd=(cat test/e2e/dra/kind.yaml)
134+
kind_node_source=.
135+
{%- endif %}
136+
{%- if all_features %}
137+
# Which DRA features exist can change over time.
138+
features=( $( {%- if ci %} curl --fail --silent --show-error --location "https://raw.githubusercontent.com/kubernetes/kubernetes/$hash/pkg/features/kube_features.go" | grep '"DRA' {% else %} grep '"DRA' pkg/features/kube_features.go {%- endif %} | sed 's/.*"\(.*\)"/\1/' ) )
139+
: "Enabling DRA feature(s): ${features[*]}."
140+
{%- else %}
141+
features=( )
142+
{%- endif %}
143+
{%- if ci %}
144+
curl --fail --silent --show-error --location "${CI_URL}/$revision/kubernetes-test-linux-amd64.tar.gz" | tar zxvf -
145+
ginkgo=kubernetes/test/bin/ginkgo
146+
e2e_test=kubernetes/test/bin/e2e.test
147+
{%- else %}
148+
make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test"
149+
ginkgo=_output/bin/ginkgo
150+
e2e_test=_output/bin/e2e.test
151+
{%- endif %}
152+
# The latest kind is assumed to work also for older release branches, should this job get forked.
153+
curl --fail --silent --show-error --location https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind
154+
control_plane_image=dra/node:latest
155+
kind build node-image --image="$control_plane_image" "${kind_node_source}"
156+
{%- if kubelet_skew|int > 0 %}
157+
major=$(echo "$revision" | sed -e 's/^v\([0-9]*\).*/\1/')
158+
minor=$(echo "$revision" | sed -e 's/^v[0-9]*\([0-9]*\).*/\1/')
159+
# TODO: find latest patch release
160+
worker_image=dra/node:skewed{{kubelet_skew}}
161+
kind build node-image --image="$worker_image" "https://dl.k8s.io/v$major.$((minor - {{kubelet_skew|int}})).0/kubernetes-server-linux-amd64.tar.gz"
162+
# We might need support for disabling tests which need a recent kubelet. We'll see...
163+
{%- else %}
164+
worker_image="$control_plane_image"
165+
{%- endif %}
166+
GINKGO_E2E_PID=
167+
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -TERM "${GINKGO_E2E_PID}"; fi' TERM
168+
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -INT "${GINKGO_E2E_PID}"; fi' INT
169+
# The final config gets dumped to stderr of the job.
170+
# It's the result of getting the original kind.yaml, manipulating it with sed,
171+
# and adding something at the end.
172+
kind create cluster --retain --config <( (
173+
${kind_yaml_cmd[@]} |
174+
# Configure potentially different images for control plane and workers.
175+
sed -e "/^- role: control-plane/ a \ image: $control_plane_image" -e "/^- role: worker/ a \ image: $worker_image"
176+
177+
# Additional features are not in kind.yaml, but they can be added at the end.
178+
for feature in ${features[@]}; do echo " ${feature}: true"; done
179+
180+
# Append ClusterConfiguration which causes etcd to use /tmp
181+
# (https://github.com/kubernetes-sigs/kind/issues/845#issuecomment-1261248420).
182+
# There's no kubeadmConfigPatches in any kind.yaml, so we can append at the end.
183+
cat <<EOF
184+
kubeadmConfigPatches:
185+
- |
186+
kind: ClusterConfiguration
187+
etcd:
188+
local:
189+
dataDir: /tmp/etcd
190+
EOF
191+
) | tee /dev/stderr )
192+
atexit () {
193+
kind export logs "${ARTIFACTS}/kind"
194+
kind delete cluster
195+
}
196+
trap atexit EXIT
197+
KUBECONFIG=${HOME}/.kube/config ${ginkgo} run --nodes=8 --timeout=24h --silence-skips --force-newlines --no-color --label-filter="DRA && Feature: isSubsetOf { OffByDefault, DynamicResourceAllocation } {%- if not all_features %} && !Alpha {%- endif %} && !Flaky {%- if not ci %} && !Slow {%- endif %}" ${e2e_test} -- -provider=local -report-dir="${ARTIFACTS}" -report-complete-ginkgo -report-complete-junit &
198+
GINKGO_E2E_PID=$!
199+
wait "${GINKGO_E2E_PID}"
200+
{%- else -%}
120201
{%- if ci %}
121202
# A CI job uses pre-built release artifacts and pulls necessary source files from GitHub.
122203
revision=$(curl --fail --silent --show-error --location ${CI_URL}/${LATEST_TXT})
@@ -170,6 +251,7 @@ presubmits:
170251
KUBECONFIG=${HOME}/.kube/config ${ginkgo} run --nodes=8 --timeout=24h --silence-skips --force-newlines --no-color --label-filter="DRA && Feature: isSubsetOf { OffByDefault, DynamicResourceAllocation } {%- if not all_features %} && !Alpha {%- endif %} && !Flaky {%- if not ci %} && !Slow {%- endif %}" ${e2e_test} -- -provider=local -report-dir="${ARTIFACTS}" -report-complete-ginkgo -report-complete-junit &
171252
GINKGO_E2E_PID=$!
172253
wait "${GINKGO_E2E_PID}"
254+
{%- endif -%}
173255
{%- if ci %}
174256
env:
175257
- name: LATEST_TXT

0 commit comments

Comments
 (0)