diff --git a/cluster/manifests/skipper/skipper-redis.yaml b/cluster/manifests/skipper/skipper-redis.yaml index 1ff5f8cfff..4324282fc7 100644 --- a/cluster/manifests/skipper/skipper-redis.yaml +++ b/cluster/manifests/skipper/skipper-redis.yaml @@ -22,6 +22,7 @@ spec: version: v7.2.4 annotations: cluster-autoscaler.kubernetes.io/safe-to-evict: "false" + karpenter.sh/do-not-disrupt: "true" logging/destination: "{{ .Cluster.ConfigItems.log_destination_both }}" {{- if eq .Cluster.ConfigItems.skipper_topology_spread_enabled "true" }} zalando.org/topology-spread-timeout: 7m @@ -30,6 +31,7 @@ spec: {{- if eq .Cluster.ConfigItems.skipper_topology_spread_enabled "true" }} topologySpreadConstraints: - maxSkew: 1 + minDomains: 3 topologyKey: topology.kubernetes.io/zone whenUnsatisfiable: DoNotSchedule labelSelector: @@ -46,6 +48,24 @@ spec: operator: In values: - skipper-ingress-redis + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + - c7i.large + - m7i.large + - weight: 50 + preference: + matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + - c6i.large + - m6i.large priorityClassName: "{{ .Cluster.ConfigItems.system_priority_class }}" terminationGracePeriodSeconds: 45 containers: @@ -81,9 +101,9 @@ spec: schedulerName: default-scheduler {{ if eq .Cluster.ConfigItems.enable_dedicate_nodepool_skipper_redis "true"}} nodeSelector: - dedicated: skipper-ingress-redis + zalando.org/dedicated: skipper-ingress-redis tolerations: - - effect: NoSchedule - key: dedicated - value: skipper-ingress-redis + - key: "zalando.org/dedicated" + operator: Exists + effect: NoSchedule {{ end }} diff --git a/test/e2e/cluster_config.sh b/test/e2e/cluster_config.sh index 508b2b186d..ffdeb24339 100755 --- a/test/e2e/cluster_config.sh +++ b/test/e2e/cluster_config.sh @@ -164,7 +164,7 @@ EOFF - "g6.xlarge" - "g6.2xlarge" - "g6.4xlarge" - name: karpenter-gpu + name: karpenter-gpu-tesla profile: worker-karpenter min_size: 0 max_size: 0 @@ -181,6 +181,52 @@ EOFF config_items: labels: dedicated=node-reboot-tests taints: dedicated=node-reboot-tests:NoSchedule + - config_items: + requirements: "- key: karpenter.k8s.aws/instance-gpu-manufacturer\n operator: In\n values:\n - nvidia\n- key: zalando.org/dedicated\n operator: Exists\n" + scaling_priority: "2" + taints: nvidia.com/gpu=present:NoSchedule,zalando.org/dedicated=dedicated:NoSchedule + discount_strategy: none + instance_type: not-specified + instance_types: + - not-specified + max_size: 0 + min_size: 0 + name: karpenter-gpu-dedicated + profile: worker-karpenter + - config_items: + requirements: "- key: zalando.org/dedicated\n operator: Exists\n" + scaling_priority: "1" + taints: zalando.org/dedicated=dedicated:NoSchedule + discount_strategy: none + instance_type: not-specified + instance_types: + - not-specified + max_size: 0 + min_size: 0 + name: karpenter-catch-all-dedicated + profile: worker-karpenter + - config_items: + requirements: "- key: karpenter.k8s.aws/instance-gpu-manufacturer\n operator: In\n values:\n - nvidia\n" + scaling_priority: "3" + taints: nvidia.com/gpu=present:NoSchedule + discount_strategy: none + instance_type: not-specified + instance_types: + - not-specified + max_size: 0 + min_size: 0 + name: karpenter-gpu + profile: worker-karpenter + - config_items: + scaling_priority: "2" + discount_strategy: none + instance_type: not-specified + instance_types: + - not-specified + max_size: 0 + min_size: 0 + name: karpenter-catch-all + profile: worker-karpenter provider: ${CLUSTER_PROVIDER} region: ${REGION} owner: '${OWNER}'