Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions config/jobs/kubernetes/sig-node/dra-canary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,135 @@ presubmits:
cpu: 2
memory: 6Gi

- name: pull-kubernetes-kind-dra-n-3-canary
cluster: eks-prow-build-cluster
skip_branches:
- release-\d+\.\d+ # per-release image
always_run: false
optional: true
labels:
preset-service-account: "true"
preset-dind-enabled: "true"
preset-kind-volume-mounts: "true"
annotations:
testgrid-dashboards: sig-node-dynamic-resource-allocation, sig-node-presubmits
description: Runs E2E tests for Dynamic Resource Allocation beta features against a Kubernetes master cluster created with sigs.k8s.io/kind with kubelet from the "current - 3" release.
testgrid-alert-email: [email protected], [email protected]
decorate: true
decoration_config:
timeout: 90m
path_alias: k8s.io/kubernetes
spec:
containers:
- image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250815-171060767f-master
command:
- runner.sh
args:
- /bin/bash
- -xce
- |
set -o pipefail
# A presubmit job uses the checked out and merged source code.
revision=$(git describe --tags)
kind_yaml_cmd=(cat test/e2e/dra/kind.yaml)
kind_node_source=.
features=( )
make WHAT="github.com/onsi/ginkgo/v2/ginkgo k8s.io/kubernetes/test/e2e/e2e.test"
ginkgo=_output/bin/ginkgo
e2e_test=_output/bin/e2e.test
# The latest kind is assumed to work also for older release branches, should this job get forked.
curl --fail --silent --show-error --location https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind
kind build node-image --image=dra/node:latest "${kind_node_source}"
GINKGO_E2E_PID=
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -TERM "${GINKGO_E2E_PID}"; fi' TERM
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -INT "${GINKGO_E2E_PID}"; fi' INT
# The final kind.yaml the result of getting the original kind.yaml, manipulating it with sed,
# and adding something at the end.
(
${kind_yaml_cmd[@]}

# Additional features are not in kind.yaml, but they can be added at the end.
for feature in ${features[@]}; do echo " ${feature}: true"; done

# Append ClusterConfiguration which causes etcd to use /tmp
# (https://github.com/kubernetes-sigs/kind/issues/845#issuecomment-1261248420).
# There's no kubeadmConfigPatches in any kind.yaml, so we can append at the end.
cat <<EOF
kubeadmConfigPatches:
- |
kind: ClusterConfiguration
etcd:
local:
dataDir: /tmp/etcd
EOF
) >/tmp/kind.yaml
cat /tmp/kind.yaml
kind create cluster --retain --config /tmp/kind.yaml --image dra/node:latest
atexit () {
kind export logs "${ARTIFACTS}/kind"
kind delete cluster
}
trap atexit EXIT

# Replace the kubelet binary and restart it, as in https://gist.github.com/aojea/2c94034f8e86d08842e5916231eb3fe1
# and https://github.com/kubernetes/test-infra/blob/9cccc25265537e8dfa556688cf10754622014424/experiment/compatibility-versions/emulated-version-upgrade.sh#L56-L66.
major=$(echo "$revision" | sed -e 's/^v\([0-9]*\).*/\1/')
minor=$(echo "$revision" | sed -e 's/^v[0-9]*\.\([0-9]*\).*/\1/')
previous_minor=$((minor - 3))
# Test with the stable release to avoid breaking presubmits because of unrelated issues in a release candidate.
# Ask curl to append the HTTP status code after the response body (-w ' %{http_code}').
# Then parse the output using Bash parameter expansion:
# ${response% *} → everything before the last space (the body)
# ${response##* } → everything after the last space (the HTTP code)
response=$(curl --silent -w ' %{http_code}' -L "https://dl.k8s.io/release/stable-$major.$previous_minor.txt" )
previous="${response% *}"
status="${response##* }"
if [ "$status" == 404 ] ; then
# if stable doesn't exist - use latest
response=$(curl --silent -w ' %{http_code}' -L "https://dl.k8s.io/release/latest-$major.$previous_minor.txt" )
previous="${response% *}"
status="${response##* }"
fi
if [ "$status" -ne 200 ] ; then
echo "error: unable to get release $major.$previous_minor info, HTTP status: $status, response: $previous"
exit 1
fi
curl --silent -L "https://dl.k8s.io/release/$previous/kubernetes-server-linux-amd64.tar.gz" | tar zxOf - kubernetes/server/bin/kubelet >/tmp/kubelet
chmod a+rx /tmp/kubelet
/tmp/kubelet --version
worker_nodes=$(kind get nodes | grep worker)
for n in $worker_nodes; do
docker cp /tmp/kubelet $n:/usr/bin/kubelet
docker exec $n systemctl restart kubelet
done

# We need support for disabling tests which need a recent kubelet.
# If a test is labeled with `KubeletMinVersion:1.34`, then it cannot run
# when the deployed kubelet is 1.32. This is enforced by
# generating `! KubeletMinVersion: containsAny { 1.33, 1.34 }`, i.e.
# including all unsupportd kubelet versions in a deny list.
kubelet_label_filter=" && ! KubeletMinVersion: containsAny { $( for v in $(seq $((previous_minor + 1)) $((minor - 1))); do echo "1.$v, "; done)1.$minor }"

# Running tests which only cover control plane behavior are not useful
# in a kubelet version skew job. We can filter them out by including
# only tests which have the DynamicResourceAllocation feature because
# only those cover kubelet behavior.
kubelet_label_filter+=" && Feature: containsAny DynamicResourceAllocation"

KUBECONFIG=${HOME}/.kube/config ${ginkgo} run --nodes=8 --timeout=24h --silence-skips --force-newlines --no-color --label-filter="DRA && Feature: isSubsetOf { OffByDefault, DynamicResourceAllocation } && !FeatureGate:ResourceHealthStatus$kubelet_label_filter && !Alpha && !Flaky && !Slow" ${e2e_test} -- -provider=local -report-dir="${ARTIFACTS}" -report-complete-ginkgo -report-complete-junit &
GINKGO_E2E_PID=$!
wait "${GINKGO_E2E_PID}"
# docker-in-docker needs privileged mode
securityContext:
privileged: true
resources:
limits:
cpu: 2
memory: 6Gi
requests:
cpu: 2
memory: 6Gi

- name: pull-kubernetes-dra-integration-canary
cluster: eks-prow-build-cluster
skip_branches:
Expand Down
117 changes: 117 additions & 0 deletions config/jobs/kubernetes/sig-node/dra-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,123 @@ periodics:
cpu: 2
memory: 6Gi

- name: ci-kind-dra-n-3
cluster: eks-prow-build-cluster
interval: 6h
labels:
preset-service-account: "true"
preset-dind-enabled: "true"
preset-kind-volume-mounts: "true"
annotations:
testgrid-dashboards: sig-node-dynamic-resource-allocation
description: Runs E2E tests for Dynamic Resource Allocation beta features against a Kubernetes master cluster created with sigs.k8s.io/kind with kubelet from the "current - 3" release.
testgrid-alert-email: [email protected], [email protected]
fork-per-release: "true"
fork-per-release-periodic-interval: 24h
fork-per-release-replacements: latest-fast.txt -> latest-{{.Version}}.txt, https://dl.k8s.io/ci/fast -> https://dl.k8s.io/ci
decorate: true
decoration_config:
timeout: 90m
spec:
containers:
- image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250815-171060767f-master
command:
- runner.sh
args:
- /bin/bash
- -xce
- |
set -o pipefail
# A CI job uses pre-built release artifacts and pulls necessary source files from GitHub.
revision=$(curl --fail --silent --show-error --location https://dl.k8s.io/ci/fast/latest-fast.txt)
# Report what was tested.
echo "{\"revision\":\"$revision\"}" >"${ARTIFACTS}/metadata.json"
# git hash from e.g. v1.33.0-alpha.1.161+e62ce1c9db2dad
hash=${revision/*+/}
kind_yaml_cmd=(curl --fail --silent --show-error --location "https://raw.githubusercontent.com/kubernetes/kubernetes/$hash/test/e2e/dra/kind.yaml")
kind_node_source="https://dl.k8s.io/ci/fast/$revision/kubernetes-server-linux-amd64.tar.gz"
features=( )
curl --fail --silent --show-error --location "https://dl.k8s.io/ci/fast/$revision/kubernetes-test-linux-amd64.tar.gz" | tar zxvf -
ginkgo=kubernetes/test/bin/ginkgo
e2e_test=kubernetes/test/bin/e2e.test
# The latest kind is assumed to work also for older release branches, should this job get forked.
curl --fail --silent --show-error --location https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" kind
kind build node-image --image=dra/node:latest "${kind_node_source}"
GINKGO_E2E_PID=
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -TERM "${GINKGO_E2E_PID}"; fi' TERM
trap 'if [ "${GINKGO_E2E_PID}" ]; then kill -INT "${GINKGO_E2E_PID}"; fi' INT
# The final kind.yaml the result of getting the original kind.yaml, manipulating it with sed,
# and adding something at the end.
(
${kind_yaml_cmd[@]}

# Additional features are not in kind.yaml, but they can be added at the end.
for feature in ${features[@]}; do echo " ${feature}: true"; done

# Append ClusterConfiguration which causes etcd to use /tmp
# (https://github.com/kubernetes-sigs/kind/issues/845#issuecomment-1261248420).
# There's no kubeadmConfigPatches in any kind.yaml, so we can append at the end.
cat <<EOF
kubeadmConfigPatches:
- |
kind: ClusterConfiguration
etcd:
local:
dataDir: /tmp/etcd
EOF
) >/tmp/kind.yaml
cat /tmp/kind.yaml
kind create cluster --retain --config /tmp/kind.yaml --image dra/node:latest
atexit () {
kind export logs "${ARTIFACTS}/kind"
kind delete cluster
}
trap atexit EXIT

# Replace the kubelet binary and restart it, as in https://gist.github.com/aojea/2c94034f8e86d08842e5916231eb3fe1
# and https://github.com/kubernetes/test-infra/blob/9cccc25265537e8dfa556688cf10754622014424/experiment/compatibility-versions/emulated-version-upgrade.sh#L56-L66.
major=$(echo "$revision" | sed -e 's/^v\([0-9]*\).*/\1/')
minor=$(echo "$revision" | sed -e 's/^v[0-9]*\.\([0-9]*\).*/\1/')
previous_minor=$((minor - 3))
# Test with the most recent CI build, doesn't even need to be released yet.
# We want to know if those are broken.
previous=$(curl --silent -L "https://dl.k8s.io/ci/latest-$major.$previous_minor.txt" )
curl --silent -L "https://dl.k8s.io/ci/$previous/kubernetes-server-linux-amd64.tar.gz" | tar zxOf - kubernetes/server/bin/kubelet >/tmp/kubelet
chmod a+rx /tmp/kubelet
/tmp/kubelet --version
worker_nodes=$(kind get nodes | grep worker)
for n in $worker_nodes; do
docker cp /tmp/kubelet $n:/usr/bin/kubelet
docker exec $n systemctl restart kubelet
done

# We need support for disabling tests which need a recent kubelet.
# If a test is labeled with `KubeletMinVersion:1.34`, then it cannot run
# when the deployed kubelet is 1.32. This is enforced by
# generating `! KubeletMinVersion: containsAny { 1.33, 1.34 }`, i.e.
# including all unsupportd kubelet versions in a deny list.
kubelet_label_filter=" && ! KubeletMinVersion: containsAny { $( for v in $(seq $((previous_minor + 1)) $((minor - 1))); do echo "1.$v, "; done)1.$minor }"

# Running tests which only cover control plane behavior are not useful
# in a kubelet version skew job. We can filter them out by including
# only tests which have the DynamicResourceAllocation feature because
# only those cover kubelet behavior.
kubelet_label_filter+=" && Feature: containsAny DynamicResourceAllocation"

KUBECONFIG=${HOME}/.kube/config ${ginkgo} run --nodes=8 --timeout=24h --silence-skips --force-newlines --no-color --label-filter="DRA && Feature: isSubsetOf { OffByDefault, DynamicResourceAllocation } && !FeatureGate:ResourceHealthStatus$kubelet_label_filter && !Alpha && !Flaky" ${e2e_test} -- -provider=local -report-dir="${ARTIFACTS}" -report-complete-ginkgo -report-complete-junit &
GINKGO_E2E_PID=$!
wait "${GINKGO_E2E_PID}"
# docker-in-docker needs privileged mode
securityContext:
privileged: true
resources:
limits:
cpu: 2
memory: 6Gi
requests:
cpu: 2
memory: 6Gi

- name: ci-dra-integration
cluster: eks-prow-build-cluster
interval: 6h
Expand Down
Loading