Skip to content

Commit d91d92f

Browse files
[CI] Use Grace Periods instead of PDBs
GKE apparently does not like to upgrade the cluster with how we were attempting to use PDBs. Switch over to grace periods specified per pod, which is probably closer to what we actually intend anyways. This might have the chance of killing a still running job, but that should be pretty rare given the vast majority of jobs complete in under an hour and that we have maintenance windows set to only run jobs in the deep troughs of demand.
1 parent f445f1b commit d91d92f

File tree

5 files changed

+3
-40
lines changed

5 files changed

+3
-40
lines changed

premerge/libcxx_runners_values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ template:
1111
annotations:
1212
cluster-autoscaler.kubernetes.io/safe-to-evict: "false"
1313
spec:
14+
terminationGracePeriodSeconds: 3600
1415
tolerations:
1516
- key: "premerge-platform-libcxx"
1617
operator: "Equal"

premerge/linux_runners_values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ template:
1212
cluster-autoscaler.kubernetes.io/safe-to-evict: "false"
1313
spec:
1414
serviceAccountName: linux-runners-ksa
15+
terminationGracePeriodSeconds: 3600
1516
tolerations:
1617
- key: "premerge-platform"
1718
operator: "Equal"

premerge/pod_disruption_budget.yaml

Lines changed: 0 additions & 10 deletions
This file was deleted.

premerge/premerge_resources/main.tf

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -458,36 +458,6 @@ resource "kubernetes_service_account" "windows_2022_object_cache_ksa" {
458458
depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_runners]
459459
}
460460

461-
# We set up pod disruption budgets here. We need one per namespace and we need
462-
# to set the min pod count to the maximum number of runner pods that can
463-
# possibly exist so we never have a number of disruptible pods greater than
464-
# zero.
465-
466-
resource "kubernetes_manifest" "linux_runners_disruption_budget" {
467-
manifest = yamldecode(templatefile("pod_disruption_budget.yaml", { runner_set_name : "llvm-premerge-linux-runners", min_pod_count : 16 }))
468-
depends_on = [kubernetes_namespace.llvm_premerge_linux_runners]
469-
}
470-
471-
resource "kubernetes_manifest" "windows_2022_runners_disruption_budget" {
472-
manifest = yamldecode(templatefile("pod_disruption_budget.yaml", { runner_set_name : "llvm-premerge-windows-2022-runners", min_pod_count : 16 }))
473-
depends_on = [kubernetes_namespace.llvm_premerge_windows_2022_runners]
474-
}
475-
476-
resource "kubernetes_manifest" "libcxx_runners_disruption_budget" {
477-
manifest = yamldecode(templatefile("pod_disruption_budget.yaml", { runner_set_name : "llvm-premerge-libcxx-runners", min_pod_count : 32 }))
478-
depends_on = [kubernetes_namespace.llvm_premerge_libcxx_runners]
479-
}
480-
481-
resource "kubernetes_manifest" "libcxx_release_runners_disruption_budget" {
482-
manifest = yamldecode(templatefile("pod_disruption_budget.yaml", { runner_set_name : "llvm-premerge-libcxx-release-runners", min_pod_count : 32 }))
483-
depends_on = [kubernetes_namespace.llvm_premerge_libcxx_release_runners]
484-
}
485-
486-
resource "kubernetes_manifest" "libcxx_next_runners_disruption_budget" {
487-
manifest = yamldecode(templatefile("pod_disruption_budget.yaml", { runner_set_name : "llvm-premerge-libcxx-next-runners", min_pod_count : 32 }))
488-
depends_on = [kubernetes_namespace.llvm_premerge_libcxx_next_runners]
489-
}
490-
491461
resource "kubernetes_namespace" "premerge_advisor" {
492462
metadata {
493463
name = "premerge-advisor"

premerge/windows_2022_runner_values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ template:
1212
cluster-autoscaler.kubernetes.io/safe-to-evict: "false"
1313
spec:
1414
serviceAccountName: windows-runners-ksa
15+
terminationGracePeriodSeconds: 3600
1516
tolerations:
1617
- key: "node.kubernetes.io/os"
1718
operator: "Equal"

0 commit comments

Comments
 (0)