Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ periodics:
preset-k8s-ssh: "true"
decorate: true
decoration_config:
timeout: 220m
timeout: 2h30m
extra_refs:
- org: kubernetes
repo: kubernetes
Expand Down
4 changes: 2 additions & 2 deletions config/jobs/kubernetes/sig-cloud-provider/gcp/gcp-gce.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1041,7 +1041,7 @@ periodics:
preset-k8s-ssh: "true"
decorate: true
decoration_config:
timeout: 200m
timeout: 1h20m
spec:
containers:
- command:
Expand All @@ -1054,7 +1054,7 @@ periodics:
- --gcp-region=us-central1
- --provider=gce
- --test_args=--ginkgo.focus=\[Feature:Reboot\] --minStartupPods=8
- --timeout=180m
- --timeout=60m
image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250815-171060767f-master
resources:
limits:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ periodics:
testgrid-alert-stale-results-hours: '24'
decorate: true
decoration_config:
timeout: 300m
timeout: 1h20m
spec:
containers:
- command:
Expand All @@ -52,7 +52,7 @@ periodics:
- --gcp-zone=us-central1-b
- --provider=gce
- --test_args=--ginkgo.focus=\[Feature:GPUDevicePlugin\] --minStartupPods=8
- --timeout=180m
- --timeout=60m
# TODO: drop this once it's in the defaults
- --env=LOG_DUMP_SYSTEMD_SERVICES=containerd
image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250815-171060767f-master
Expand Down
2 changes: 1 addition & 1 deletion config/jobs/kubernetes/sig-network/sig-network-kind.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,7 @@ periodics:
testgrid-tab-name: sig-network-kind, detect-local-interface-name-prefix
description: Runs network tests using KIND against latest kubernetes master with a kubernetes-in-docker cluster and kube-proxy detectLocalMode=InterfaceNamePrefix
testgrid-alert-email: [email protected], [email protected]
- interval: 12h
- interval: 3h
name: ci-kubernetes-kind-cloud-provider-loadbalancer
cluster: k8s-infra-prow-build
labels:
Expand Down
4 changes: 2 additions & 2 deletions config/jobs/kubernetes/sig-node/crio.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ periodics:
preset-k8s-ssh: "true"
decorate: true
decoration_config:
timeout: 240m
timeout: 2h
extra_refs:
- org: kubernetes
repo: kubernetes
Expand Down Expand Up @@ -507,7 +507,7 @@ periodics:
preset-k8s-ssh: "true"
decorate: true
decoration_config:
timeout: 240m
timeout: 1h
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious why this one got 1h? the one above has 2h and that's within limit

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The commit messages mention this for each change, we don't want to set high timeouts when we don't need to, these jobs are running well within 1h. If they start to take longer, that's a red flag.

Copy link
Member Author

@BenTheElder BenTheElder Aug 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I only set them to the maximum to avoid regressing them. In general these jobs tend to have excessively high timeouts, which will mask failure modes. If a job suddenly goes from <1h to 2h that's almost certainly an excessive retry/timeout that will lead to failure or a massive regression.

extra_refs:
- org: kubernetes
repo: kubernetes
Expand Down
2 changes: 1 addition & 1 deletion config/jobs/kubernetes/sig-node/node-kubelet.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
periodics:
- name: ci-kubernetes-node-e2e-containerd
cluster: k8s-infra-prow-build
interval: 4h
interval: 3h
labels:
preset-service-account: "true"
preset-k8s-ssh: "true"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ periodics:
cluster: k8s-infra-prow-build
decorate: true
decoration_config:
timeout: 3h40m0s
timeout: 2h30m
extra_refs:
- base_ref: release-1.31
org: kubernetes
Expand Down Expand Up @@ -90,7 +90,7 @@ periodics:
cron: 0 14-23/24 * * *
decorate: true
decoration_config:
timeout: 5h0m0s
timeout: 1h20m
labels:
preset-ci-gce-device-plugin-gpu: "true"
preset-k8s-ssh: "true"
Expand All @@ -108,7 +108,7 @@ periodics:
- --gcp-zone=us-central1-b
- --provider=gce
- --test_args=--ginkgo.focus=\[Feature:GPUDevicePlugin\] --minStartupPods=8
- --timeout=180m
- --timeout=60m
- --env=LOG_DUMP_SYSTEMD_SERVICES=containerd
command:
- runner.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ periodics:
cluster: k8s-infra-prow-build
decorate: true
decoration_config:
timeout: 3h40m0s
timeout: 2h30m
extra_refs:
- base_ref: release-1.32
org: kubernetes
Expand Down Expand Up @@ -90,7 +90,7 @@ periodics:
cron: 0 14-23/24 * * *
decorate: true
decoration_config:
timeout: 5h0m0s
timeout: 1h20m
labels:
preset-ci-gce-device-plugin-gpu: "true"
preset-k8s-ssh: "true"
Expand All @@ -108,7 +108,7 @@ periodics:
- --gcp-zone=us-central1-b
- --provider=gce
- --test_args=--ginkgo.focus=\[Feature:GPUDevicePlugin\] --minStartupPods=8
- --timeout=180m
- --timeout=60m
- --env=LOG_DUMP_SYSTEMD_SERVICES=containerd
command:
- runner.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ periodics:
cluster: k8s-infra-prow-build
decorate: true
decoration_config:
timeout: 3h40m0s
timeout: 2h30m
extra_refs:
- base_ref: release-1.33
org: kubernetes
Expand Down Expand Up @@ -90,7 +90,7 @@ periodics:
cron: 0 8-23/24 * * *
decorate: true
decoration_config:
timeout: 5h0m0s
timeout: 1h20m
labels:
preset-ci-gce-device-plugin-gpu: "true"
preset-k8s-ssh: "true"
Expand All @@ -108,7 +108,7 @@ periodics:
- --gcp-zone=us-central1-b
- --provider=gce
- --test_args=--ginkgo.focus=\[Feature:GPUDevicePlugin\] --minStartupPods=8
- --timeout=180m
- --timeout=60m
- --env=LOG_DUMP_SYSTEMD_SERVICES=containerd
command:
- runner.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ periodics:
cluster: k8s-infra-prow-build
decorate: true
decoration_config:
timeout: 3h40m0s
timeout: 2h30m
extra_refs:
- base_ref: release-1.34
org: kubernetes
Expand Down Expand Up @@ -90,7 +90,7 @@ periodics:
cron: 0 8-23/12 * * *
decorate: true
decoration_config:
timeout: 5h0m0s
timeout: 1h20m
labels:
preset-ci-gce-device-plugin-gpu: "true"
preset-k8s-ssh: "true"
Expand All @@ -108,7 +108,7 @@ periodics:
- --gcp-zone=us-central1-b
- --provider=gce
- --test_args=--ginkgo.focus=\[Feature:GPUDevicePlugin\] --minStartupPods=8
- --timeout=180m
- --timeout=60m
- --env=LOG_DUMP_SYSTEMD_SERVICES=containerd
command:
- runner.sh
Expand Down Expand Up @@ -1172,7 +1172,7 @@ periodics:
org: kubernetes
path_alias: k8s.io/kubernetes
repo: kubernetes
interval: 6h
interval: 3h
labels:
preset-dind-enabled: "true"
preset-service-account: "true"
Expand Down
4 changes: 2 additions & 2 deletions config/jobs/kubernetes/sig-testing/conformance-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ periodics:

- name: ci-kubernetes-conformance-kind-ga-only
cluster: k8s-infra-prow-build
interval: 12h
interval: 3h
decorate: true
labels:
preset-dind-enabled: "true"
Expand All @@ -155,7 +155,7 @@ periodics:
base_ref: master
path_alias: k8s.io/kubernetes
decoration_config:
timeout: 200m # allow plenty of time for a serial conformance run
timeout: 2h30m
spec:
containers:
- image: gcr.io/k8s-staging-test-infra/krte:v20250815-171060767f-master
Expand Down
74 changes: 70 additions & 4 deletions config/tests/jobs/jobs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1170,27 +1170,93 @@ func TestClusterName(t *testing.T) {
}
t.Logf("summary: %4d/%4d jobs fail to meet sig-k8s-infra cluster name policy", jobsToFix, len(jobs))
}

func TestKubernetesReleaseBlockingJobsCIPolicy(t *testing.T) {
jobsToFix := 0
jobs := allStaticJobs()
for _, job := range jobs {
numJobs := len(allStaticJobs())

for _, job := range c.AllPeriodics() {
// Only consider Pods that are release-blocking
if job.Spec == nil || !isKubernetesReleaseBlocking(job) {
if job.Spec == nil || !isKubernetesReleaseBlocking(job.JobBase) {
continue
}
// job Pod must qualify for Guaranteed QoS
errs := verifyPodQOSGuaranteed(job.Spec, true)
if !isCritical(job.Cluster) {
errs = append(errs, fmt.Errorf("must run in cluster: k8s-infra-prow-build or eks-prow-build-cluster, found: %v", job.Cluster))
}
// Allow some buffer over the 120m target in the release blocking job policy:
// "Have the average of 75% percentile duration of all runs for a week finishing in 120 minutes or less"
// "Run at least every 3 hours"
// https://github.com/kubernetes/sig-release/blob/master/release-blocking-jobs.md
if job.DecorationConfig.Timeout.Duration > (time.Hour*2 + time.Minute*30) {
errs = append(errs, fmt.Errorf("release-blocking job must have timeout <= 2h30m and nominally run in <=2h, yet timeout is: %v", job.DecorationConfig.Timeout))
}
// periodics must run with minimum frequency, but this is reduced on older release branches
branch := kubernetesBranch(job.ExtraRefs)
if branch == "master" || branch == "main" {
// TODO: cron ...
if job.Interval != "" {
interval := job.GetInterval()
if interval > (time.Hour * 3) {
errs = append(errs, fmt.Errorf("release-blocking job must have interval <= 3h, yet interval is: %v", interval))
}
}
}
if len(errs) > 0 {
jobsToFix++
}
for _, err := range errs {
t.Errorf("%v: %v", job.Name, err)
}
}
t.Logf("summary: %4d/%4d jobs fail to meet kubernetes/kubernetes release-blocking CI policy", jobsToFix, len(jobs))

for repo, postsubmits := range c.PostsubmitsStatic {
for _, job := range postsubmits {
// postsubmits triggering against repos other than kubernetes/kubernetes
// should not be release-blocking
if repo != "kubernetes/kubernetes" {
if job.Spec != nil && isKubernetesReleaseBlocking(job.JobBase) {
t.Errorf("%v: postsubmit should not be release-blocking when it does not trigger against kubernetes/kubernetes", job)
}
continue
}
// only consider release-blocking jobs
if job.Spec == nil || !isKubernetesReleaseBlocking(job.JobBase) {
continue
}
// release blocking jobs must follow policy
// job Pod must qualify for Guaranteed QoS
errs := verifyPodQOSGuaranteed(job.Spec, true)
if !isCritical(job.Cluster) {
errs = append(errs, fmt.Errorf("must run in cluster: k8s-infra-prow-build or eks-prow-build-cluster, found: %v", job.Cluster))
}
// Allow some buffer over the 120m target in the release blocking job policy:
// "Have the average of 75% percentile duration of all runs for a week finishing in 120 minutes or less"
// "Run at least every 3 hours"
// https://github.com/kubernetes/sig-release/blob/master/release-blocking-jobs.md
if job.DecorationConfig.Timeout.Duration > (time.Hour*2 + time.Minute*30) {
errs = append(errs, fmt.Errorf("release-blocking job must have timeout <= 2h30m and nominally run in <=2h, yet timeout is: %v", job.DecorationConfig.Timeout))
}
if len(errs) > 0 {
jobsToFix++
}
for _, err := range errs {
t.Errorf("%v: %v", job.Name, err)
}
}
}

t.Logf("summary: %4d/%4d jobs fail to meet kubernetes/kubernetes release-blocking CI policy", jobsToFix, numJobs)
}

func kubernetesBranch(refs []prowapi.Refs) string {
for _, ref := range refs {
if ref.Org == "kubernetes" && ref.Repo == "kubernetes" {
return ref.BaseRef
}
}
return ""
}

func TestK8sInfraProwBuildJobsCIPolicy(t *testing.T) {
Expand Down