diff --git a/config/jobs/kubernetes/sig-cloud-provider/gcp/gce-conformance.yaml b/config/jobs/kubernetes/sig-cloud-provider/gcp/gce-conformance.yaml index b1da2bf67c31..b05f857bddb5 100644 --- a/config/jobs/kubernetes/sig-cloud-provider/gcp/gce-conformance.yaml +++ b/config/jobs/kubernetes/sig-cloud-provider/gcp/gce-conformance.yaml @@ -54,7 +54,7 @@ periodics: preset-k8s-ssh: "true" decorate: true decoration_config: - timeout: 220m + timeout: 2h30m extra_refs: - org: kubernetes repo: kubernetes diff --git a/config/jobs/kubernetes/sig-cloud-provider/gcp/gcp-gce.yaml b/config/jobs/kubernetes/sig-cloud-provider/gcp/gcp-gce.yaml index 33542c75b902..6d04bc7bedf6 100644 --- a/config/jobs/kubernetes/sig-cloud-provider/gcp/gcp-gce.yaml +++ b/config/jobs/kubernetes/sig-cloud-provider/gcp/gcp-gce.yaml @@ -1041,7 +1041,7 @@ periodics: preset-k8s-ssh: "true" decorate: true decoration_config: - timeout: 200m + timeout: 1h20m spec: containers: - command: @@ -1054,7 +1054,7 @@ periodics: - --gcp-region=us-central1 - --provider=gce - --test_args=--ginkgo.focus=\[Feature:Reboot\] --minStartupPods=8 - - --timeout=180m + - --timeout=60m image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250815-171060767f-master resources: limits: diff --git a/config/jobs/kubernetes/sig-cloud-provider/gcp/gpu/gpu-gce.yaml b/config/jobs/kubernetes/sig-cloud-provider/gcp/gpu/gpu-gce.yaml index 84318076e8e7..ac97310b4d6c 100644 --- a/config/jobs/kubernetes/sig-cloud-provider/gcp/gpu/gpu-gce.yaml +++ b/config/jobs/kubernetes/sig-cloud-provider/gcp/gpu/gpu-gce.yaml @@ -34,7 +34,7 @@ periodics: testgrid-alert-stale-results-hours: '24' decorate: true decoration_config: - timeout: 300m + timeout: 1h20m spec: containers: - command: @@ -52,7 +52,7 @@ periodics: - --gcp-zone=us-central1-b - --provider=gce - --test_args=--ginkgo.focus=\[Feature:GPUDevicePlugin\] --minStartupPods=8 - - --timeout=180m + - --timeout=60m # TODO: drop this once it's in the defaults - --env=LOG_DUMP_SYSTEMD_SERVICES=containerd image: gcr.io/k8s-staging-test-infra/kubekins-e2e:v20250815-171060767f-master diff --git a/config/jobs/kubernetes/sig-network/sig-network-kind.yaml b/config/jobs/kubernetes/sig-network/sig-network-kind.yaml index b24ee3ea63c8..0b418d4dcc26 100644 --- a/config/jobs/kubernetes/sig-network/sig-network-kind.yaml +++ b/config/jobs/kubernetes/sig-network/sig-network-kind.yaml @@ -858,7 +858,7 @@ periodics: testgrid-tab-name: sig-network-kind, detect-local-interface-name-prefix description: Runs network tests using KIND against latest kubernetes master with a kubernetes-in-docker cluster and kube-proxy detectLocalMode=InterfaceNamePrefix testgrid-alert-email: antonio.ojea.garcia@gmail.com, widaly@microsoft.com -- interval: 12h +- interval: 3h name: ci-kubernetes-kind-cloud-provider-loadbalancer cluster: k8s-infra-prow-build labels: diff --git a/config/jobs/kubernetes/sig-node/crio.yaml b/config/jobs/kubernetes/sig-node/crio.yaml index 21d1c9c9abbc..c3abb3b56eae 100644 --- a/config/jobs/kubernetes/sig-node/crio.yaml +++ b/config/jobs/kubernetes/sig-node/crio.yaml @@ -7,7 +7,7 @@ periodics: preset-k8s-ssh: "true" decorate: true decoration_config: - timeout: 240m + timeout: 2h extra_refs: - org: kubernetes repo: kubernetes @@ -507,7 +507,7 @@ periodics: preset-k8s-ssh: "true" decorate: true decoration_config: - timeout: 240m + timeout: 1h extra_refs: - org: kubernetes repo: kubernetes diff --git a/config/jobs/kubernetes/sig-node/node-kubelet.yaml b/config/jobs/kubernetes/sig-node/node-kubelet.yaml index 6cc5a636beb8..bfc3a9c40236 100644 --- a/config/jobs/kubernetes/sig-node/node-kubelet.yaml +++ b/config/jobs/kubernetes/sig-node/node-kubelet.yaml @@ -1,7 +1,7 @@ periodics: - name: ci-kubernetes-node-e2e-containerd cluster: k8s-infra-prow-build - interval: 4h + interval: 3h labels: preset-service-account: "true" preset-k8s-ssh: "true" diff --git a/config/jobs/kubernetes/sig-release/release-branch-jobs/1.31.yaml b/config/jobs/kubernetes/sig-release/release-branch-jobs/1.31.yaml index 5f707e670332..bb4086b46ba8 100644 --- a/config/jobs/kubernetes/sig-release/release-branch-jobs/1.31.yaml +++ b/config/jobs/kubernetes/sig-release/release-branch-jobs/1.31.yaml @@ -43,7 +43,7 @@ periodics: cluster: k8s-infra-prow-build decorate: true decoration_config: - timeout: 3h40m0s + timeout: 2h30m extra_refs: - base_ref: release-1.31 org: kubernetes @@ -90,7 +90,7 @@ periodics: cron: 0 14-23/24 * * * decorate: true decoration_config: - timeout: 5h0m0s + timeout: 1h20m labels: preset-ci-gce-device-plugin-gpu: "true" preset-k8s-ssh: "true" @@ -108,7 +108,7 @@ periodics: - --gcp-zone=us-central1-b - --provider=gce - --test_args=--ginkgo.focus=\[Feature:GPUDevicePlugin\] --minStartupPods=8 - - --timeout=180m + - --timeout=60m - --env=LOG_DUMP_SYSTEMD_SERVICES=containerd command: - runner.sh diff --git a/config/jobs/kubernetes/sig-release/release-branch-jobs/1.32.yaml b/config/jobs/kubernetes/sig-release/release-branch-jobs/1.32.yaml index 202d17f24d3e..eb21059dde49 100644 --- a/config/jobs/kubernetes/sig-release/release-branch-jobs/1.32.yaml +++ b/config/jobs/kubernetes/sig-release/release-branch-jobs/1.32.yaml @@ -43,7 +43,7 @@ periodics: cluster: k8s-infra-prow-build decorate: true decoration_config: - timeout: 3h40m0s + timeout: 2h30m extra_refs: - base_ref: release-1.32 org: kubernetes @@ -90,7 +90,7 @@ periodics: cron: 0 14-23/24 * * * decorate: true decoration_config: - timeout: 5h0m0s + timeout: 1h20m labels: preset-ci-gce-device-plugin-gpu: "true" preset-k8s-ssh: "true" @@ -108,7 +108,7 @@ periodics: - --gcp-zone=us-central1-b - --provider=gce - --test_args=--ginkgo.focus=\[Feature:GPUDevicePlugin\] --minStartupPods=8 - - --timeout=180m + - --timeout=60m - --env=LOG_DUMP_SYSTEMD_SERVICES=containerd command: - runner.sh diff --git a/config/jobs/kubernetes/sig-release/release-branch-jobs/1.33.yaml b/config/jobs/kubernetes/sig-release/release-branch-jobs/1.33.yaml index 234c5f6d392d..91e50b823ca7 100644 --- a/config/jobs/kubernetes/sig-release/release-branch-jobs/1.33.yaml +++ b/config/jobs/kubernetes/sig-release/release-branch-jobs/1.33.yaml @@ -43,7 +43,7 @@ periodics: cluster: k8s-infra-prow-build decorate: true decoration_config: - timeout: 3h40m0s + timeout: 2h30m extra_refs: - base_ref: release-1.33 org: kubernetes @@ -90,7 +90,7 @@ periodics: cron: 0 8-23/24 * * * decorate: true decoration_config: - timeout: 5h0m0s + timeout: 1h20m labels: preset-ci-gce-device-plugin-gpu: "true" preset-k8s-ssh: "true" @@ -108,7 +108,7 @@ periodics: - --gcp-zone=us-central1-b - --provider=gce - --test_args=--ginkgo.focus=\[Feature:GPUDevicePlugin\] --minStartupPods=8 - - --timeout=180m + - --timeout=60m - --env=LOG_DUMP_SYSTEMD_SERVICES=containerd command: - runner.sh diff --git a/config/jobs/kubernetes/sig-release/release-branch-jobs/1.34.yaml b/config/jobs/kubernetes/sig-release/release-branch-jobs/1.34.yaml index 4ffcad43fe4f..c4c382e72bd8 100644 --- a/config/jobs/kubernetes/sig-release/release-branch-jobs/1.34.yaml +++ b/config/jobs/kubernetes/sig-release/release-branch-jobs/1.34.yaml @@ -43,7 +43,7 @@ periodics: cluster: k8s-infra-prow-build decorate: true decoration_config: - timeout: 3h40m0s + timeout: 2h30m extra_refs: - base_ref: release-1.34 org: kubernetes @@ -90,7 +90,7 @@ periodics: cron: 0 8-23/12 * * * decorate: true decoration_config: - timeout: 5h0m0s + timeout: 1h20m labels: preset-ci-gce-device-plugin-gpu: "true" preset-k8s-ssh: "true" @@ -108,7 +108,7 @@ periodics: - --gcp-zone=us-central1-b - --provider=gce - --test_args=--ginkgo.focus=\[Feature:GPUDevicePlugin\] --minStartupPods=8 - - --timeout=180m + - --timeout=60m - --env=LOG_DUMP_SYSTEMD_SERVICES=containerd command: - runner.sh @@ -1172,7 +1172,7 @@ periodics: org: kubernetes path_alias: k8s.io/kubernetes repo: kubernetes - interval: 6h + interval: 3h labels: preset-dind-enabled: "true" preset-service-account: "true" diff --git a/config/jobs/kubernetes/sig-testing/conformance-e2e.yaml b/config/jobs/kubernetes/sig-testing/conformance-e2e.yaml index 59b4b327e30d..b347eafc70d5 100644 --- a/config/jobs/kubernetes/sig-testing/conformance-e2e.yaml +++ b/config/jobs/kubernetes/sig-testing/conformance-e2e.yaml @@ -145,7 +145,7 @@ periodics: - name: ci-kubernetes-conformance-kind-ga-only cluster: k8s-infra-prow-build - interval: 12h + interval: 3h decorate: true labels: preset-dind-enabled: "true" @@ -155,7 +155,7 @@ periodics: base_ref: master path_alias: k8s.io/kubernetes decoration_config: - timeout: 200m # allow plenty of time for a serial conformance run + timeout: 2h30m spec: containers: - image: gcr.io/k8s-staging-test-infra/krte:v20250815-171060767f-master diff --git a/config/tests/jobs/jobs_test.go b/config/tests/jobs/jobs_test.go index 2293fc68d8aa..892f6e5c04b0 100644 --- a/config/tests/jobs/jobs_test.go +++ b/config/tests/jobs/jobs_test.go @@ -1170,12 +1170,14 @@ func TestClusterName(t *testing.T) { } t.Logf("summary: %4d/%4d jobs fail to meet sig-k8s-infra cluster name policy", jobsToFix, len(jobs)) } + func TestKubernetesReleaseBlockingJobsCIPolicy(t *testing.T) { jobsToFix := 0 - jobs := allStaticJobs() - for _, job := range jobs { + numJobs := len(allStaticJobs()) + + for _, job := range c.AllPeriodics() { // Only consider Pods that are release-blocking - if job.Spec == nil || !isKubernetesReleaseBlocking(job) { + if job.Spec == nil || !isKubernetesReleaseBlocking(job.JobBase) { continue } // job Pod must qualify for Guaranteed QoS @@ -1183,6 +1185,24 @@ func TestKubernetesReleaseBlockingJobsCIPolicy(t *testing.T) { if !isCritical(job.Cluster) { errs = append(errs, fmt.Errorf("must run in cluster: k8s-infra-prow-build or eks-prow-build-cluster, found: %v", job.Cluster)) } + // Allow some buffer over the 120m target in the release blocking job policy: + // "Have the average of 75% percentile duration of all runs for a week finishing in 120 minutes or less" + // "Run at least every 3 hours" + // https://github.com/kubernetes/sig-release/blob/master/release-blocking-jobs.md + if job.DecorationConfig.Timeout.Duration > (time.Hour*2 + time.Minute*30) { + errs = append(errs, fmt.Errorf("release-blocking job must have timeout <= 2h30m and nominally run in <=2h, yet timeout is: %v", job.DecorationConfig.Timeout)) + } + // periodics must run with minimum frequency, but this is reduced on older release branches + branch := kubernetesBranch(job.ExtraRefs) + if branch == "master" || branch == "main" { + // TODO: cron ... + if job.Interval != "" { + interval := job.GetInterval() + if interval > (time.Hour * 3) { + errs = append(errs, fmt.Errorf("release-blocking job must have interval <= 3h, yet interval is: %v", interval)) + } + } + } if len(errs) > 0 { jobsToFix++ } @@ -1190,7 +1210,53 @@ func TestKubernetesReleaseBlockingJobsCIPolicy(t *testing.T) { t.Errorf("%v: %v", job.Name, err) } } - t.Logf("summary: %4d/%4d jobs fail to meet kubernetes/kubernetes release-blocking CI policy", jobsToFix, len(jobs)) + + for repo, postsubmits := range c.PostsubmitsStatic { + for _, job := range postsubmits { + // postsubmits triggering against repos other than kubernetes/kubernetes + // should not be release-blocking + if repo != "kubernetes/kubernetes" { + if job.Spec != nil && isKubernetesReleaseBlocking(job.JobBase) { + t.Errorf("%v: postsubmit should not be release-blocking when it does not trigger against kubernetes/kubernetes", job) + } + continue + } + // only consider release-blocking jobs + if job.Spec == nil || !isKubernetesReleaseBlocking(job.JobBase) { + continue + } + // release blocking jobs must follow policy + // job Pod must qualify for Guaranteed QoS + errs := verifyPodQOSGuaranteed(job.Spec, true) + if !isCritical(job.Cluster) { + errs = append(errs, fmt.Errorf("must run in cluster: k8s-infra-prow-build or eks-prow-build-cluster, found: %v", job.Cluster)) + } + // Allow some buffer over the 120m target in the release blocking job policy: + // "Have the average of 75% percentile duration of all runs for a week finishing in 120 minutes or less" + // "Run at least every 3 hours" + // https://github.com/kubernetes/sig-release/blob/master/release-blocking-jobs.md + if job.DecorationConfig.Timeout.Duration > (time.Hour*2 + time.Minute*30) { + errs = append(errs, fmt.Errorf("release-blocking job must have timeout <= 2h30m and nominally run in <=2h, yet timeout is: %v", job.DecorationConfig.Timeout)) + } + if len(errs) > 0 { + jobsToFix++ + } + for _, err := range errs { + t.Errorf("%v: %v", job.Name, err) + } + } + } + + t.Logf("summary: %4d/%4d jobs fail to meet kubernetes/kubernetes release-blocking CI policy", jobsToFix, numJobs) +} + +func kubernetesBranch(refs []prowapi.Refs) string { + for _, ref := range refs { + if ref.Org == "kubernetes" && ref.Repo == "kubernetes" { + return ref.BaseRef + } + } + return "" } func TestK8sInfraProwBuildJobsCIPolicy(t *testing.T) {