Skip to content

Commit 5f852d6

Browse files
author
nwickramasin
committed
Merge branch 'dev' into opa-e2e-tests
2 parents e516156 + d77893b commit 5f852d6

File tree

21 files changed

+253
-26
lines changed

21 files changed

+253
-26
lines changed

cluster/config-defaults.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,11 @@ nvidia_dcgm_exporter_enabled: "false"
427427
nvidia_dcgm_exporter_cpu: "10m"
428428
nvidia_dcgm_exporter_memory: "200Mi"
429429

430+
# AWS EFA device plugin
431+
aws_efa_device_plugin_enabled: "false"
432+
aws_efa_device_plugin_cpu: "10m"
433+
aws_efa_device_plugin_memory: "20Mi"
434+
430435
# static egress controller settings
431436
static_egress_controller_enabled: "true"
432437

@@ -1147,6 +1152,16 @@ control_plane_graceful_shutdown: "true"
11471152
# For rolling back it needs to be done in multiple stages: active -> serving -> pre -> none
11481153
control_plane_load_balancer_internal: "none"
11491154

1155+
# Optionally use internal subnets for running the nodes. This can be configured
1156+
# a node pool level to only run a subset of nodes in the internal subnets.
1157+
# If this is true then `associate_public_ip_on_launch` is automatically treated
1158+
# as false.
1159+
internal_node_subnets_enabled: "false"
1160+
1161+
# Configure whether to associate public ip when launching instances.
1162+
# This is only relevant when `internal_node_subnets_enabled` is false.
1163+
associate_public_ip_on_launch: "true"
1164+
11501165
# This allows setting custom sysctl settings. The config-item is intended to be
11511166
# used on node-pools rather being set globally.
11521167
#
@@ -1187,6 +1202,13 @@ kube_janitor_enabled: "true"
11871202
teapot_admission_controller_scheduling_controls_enabled: "false"
11881203
teapot_admission_controller_scheduling_controls_default_architecture: "amd64"
11891204

1205+
# master-node-autoscaler configuration
1206+
1207+
# Ignore recommendations for downgrading the instance generation of the master nodes
1208+
# This is required to persist the upgrade of instance generation for master nodes
1209+
# in a cluster.
1210+
master_node_autoscaler_instance_generation_downgrade_disabled: "false"
1211+
11901212
# role-sync-controller configs
11911213
# Enabled by default only on Zalando EKS clusters
11921214
{{ if eq .Cluster.Provider "zalando-eks" }}

cluster/manifests/01-admission-control/config.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ data:
2929
pod.service-account-iam.enable: "true"
3030
pod.service-account-iam.base-aws-account-id: "{{ accountID .Cluster.InfrastructureAccount }}"
3131
{{- if eq .Cluster.ConfigItems.teapot_admission_controller_inject_aws_waiter "true" }}
32-
pod.aws-waiter.image: "926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/automata/aws-credentials-waiter:master-234"
32+
pod.aws-waiter.image: "926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/automata/aws-credentials-waiter:master-246"
3333
{{- end }}
3434
pod.env-inject.enable: "{{ .Cluster.ConfigItems.teapot_admission_controller_inject_environment_variables }}"
3535
pod.env-inject.variable._PLATFORM_ACCOUNT: "{{ .Cluster.Alias }}"
@@ -107,6 +107,9 @@ data:
107107
pod.pod-security-policy.privileged-service-accounts.kube-system_efs-provisioner: ""
108108
{{- if eq .Cluster.ConfigItems.s3_csi_driver "true" }}
109109
pod.pod-security-policy.privileged-service-accounts.kube-system_s3-csi-driver: ""
110+
{{- end }}
111+
{{- if eq .Cluster.ConfigItems.aws_efa_device_plugin_enabled "true" }}
112+
pod.pod-security-policy.privileged-service-accounts.kube-system_aws-efa-k8s-device-plugin: ""
110113
{{- end }}
111114
pod.pod-security-policy.privileged-service-accounts.visibility_logging-agent: ""
112115
{{- range $sa := split .Cluster.ConfigItems.teapot_admission_controller_pod_security_policy_privileged_service_accounts "," }}

cluster/manifests/audittrail-adapter/daemonset.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ spec:
3333
hostNetwork: true
3434
containers:
3535
- name: audittrail-adapter
36-
image: container-registry.zalando.net/teapot/audittrail-adapter:master-71
36+
image: container-registry.zalando.net/teapot/audittrail-adapter:master-72
3737
env:
3838
- name: AWS_REGION
3939
value: "{{ .Cluster.Region }}"

cluster/manifests/aws-cloud-controller-manager/daemonset.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ spec:
2727
- --cloud-provider=aws
2828
- --use-service-account-credentials=true
2929
- --configure-cloud-routes=false
30-
image: container-registry.zalando.net/teapot/aws-cloud-controller-manager-internal:v1.31.4-master-136
30+
image: container-registry.zalando.net/teapot/aws-cloud-controller-manager-internal:v1.31.4-master-137
3131
name: aws-cloud-controller-manager
3232
resources:
3333
requests:
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
{{ if eq .Cluster.ConfigItems.aws_efa_device_plugin_enabled "true" }}
2+
# source: https://github.com/aws/eks-charts/blob/master/stable/aws-efa-k8s-device-plugin/templates/daemonset.yaml
3+
apiVersion: apps/v1
4+
kind: DaemonSet
5+
metadata:
6+
name: aws-efa-k8s-device-plugin
7+
namespace: kube-system
8+
labels:
9+
application: kubernetes
10+
component: aws-efa-k8s-device-plugin
11+
spec:
12+
selector:
13+
matchLabels:
14+
daemonset: aws-efa-k8s-device-plugin
15+
updateStrategy:
16+
type: RollingUpdate
17+
template:
18+
metadata:
19+
labels:
20+
daemonset: aws-efa-k8s-device-plugin
21+
application: kubernetes
22+
component: aws-efa-k8s-device-plugin
23+
annotations:
24+
logging/destination: "{{.Cluster.ConfigItems.log_destination_infra}}"
25+
spec:
26+
serviceAccountName: aws-efa-k8s-device-plugin
27+
tolerations:
28+
- operator: Exists
29+
effect: NoExecute
30+
- operator: Exists
31+
effect: NoSchedule
32+
# Mark this pod as a critical add-on; when enabled, the critical add-on
33+
# scheduler reserves resources for critical add-on pods so that they can
34+
# be rescheduled after a failure.
35+
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
36+
priorityClassName: "system-node-critical"
37+
affinity:
38+
nodeAffinity:
39+
requiredDuringSchedulingIgnoredDuringExecution:
40+
nodeSelectorTerms:
41+
- matchExpressions:
42+
- key: node.kubernetes.io/instance-type
43+
operator: In
44+
values:
45+
- m5dn.24xlarge
46+
- m5n.24xlarge
47+
- m5zn.12xlarge
48+
- m6a.48xlarge
49+
- m6i.32xlarge
50+
- m6id.32xlarge
51+
- m6idn.32xlarge
52+
- m6in.32xlarge
53+
- m7a.48xlarge
54+
- m7g.16xlarge
55+
- m7gd.16xlarge
56+
- m7i.48xlarge
57+
- c5n.9xlarge
58+
- c5n.18xlarge
59+
- c6a.48xlarge
60+
- c6gn.16xlarge
61+
- c6i.32xlarge
62+
- c6id.32xlarge
63+
- c6in.32xlarge
64+
- c7a.48xlarge
65+
- c7g.16xlarge
66+
- c7gd.16xlarge
67+
- c7gn.16xlarge
68+
- c7i.48xlarge
69+
- r5dn.24xlarge
70+
- r5n.24xlarge
71+
- r6a.48xlarge
72+
- r6i.32xlarge
73+
- r6idn.32xlarge
74+
- r6in.32xlarge
75+
- r6id.32xlarge
76+
- r7a.48xlarge
77+
- r7g.16xlarge
78+
- r7gd.16xlarge
79+
- r7i.48xlarge
80+
- r7iz.32xlarge
81+
- x2idn.32xlarge
82+
- x2iedn.32xlarge
83+
- x2iezn.12xlarge
84+
- i3en.12xlarge
85+
- i3en.24xlarge
86+
- i4g.16xlarge
87+
- i4i.32xlarge
88+
- im4gn.16xlarge
89+
- dl1.24xlarge
90+
- dl2q.24xlarge
91+
- g4dn.8xlarge
92+
- g4dn.12xlarge
93+
- g4dn.16xlarge
94+
- g5.8xlarge
95+
- g5.12xlarge
96+
- g5.16xlarge
97+
- g5.24xlarge
98+
- g5.48xlarge
99+
- g6.8xlarge
100+
- g6.12xlarge
101+
- g6.16xlarge
102+
- g6.24xlarge
103+
- g6.48xlarge
104+
- g6e.8xlarge
105+
- g6e.12xlarge
106+
- g6e.16xlarge
107+
- g6e.24xlarge
108+
- g6e.48xlarge
109+
- gr6.8xlarge
110+
- inf1.24xlarge
111+
- p3dn.24xlarge
112+
- p4d.24xlarge
113+
- p4de.24xlarge
114+
- p5.48xlarge
115+
- p5e.48xlarge
116+
- p5en.48xlarge
117+
- trn1.32xlarge
118+
- trn1n.32xlarge
119+
- trn2.48xlarge
120+
- vt1.24xlarge
121+
- hpc6a.48xlarge
122+
- hpc6id.32xlarge
123+
- hpc7a.12xlarge
124+
- hpc7a.24xlarge
125+
- hpc7a.48xlarge
126+
- hpc7a.96xlarge
127+
- hpc7g.4xlarge
128+
- hpc7g.8xlarge
129+
- hpc7g.16xlarge
130+
hostNetwork: true
131+
containers:
132+
- image: container-registry.zalando.net/teapot/aws-efa-k8s-device-plugin:v0.5.4-main-1
133+
name: aws-efa-k8s-device-plugin
134+
securityContext:
135+
allowPrivilegeEscalation: false
136+
capabilities:
137+
drop:
138+
- ALL
139+
runAsNonRoot: false
140+
resources:
141+
requests:
142+
cpu: "{{ .Cluster.ConfigItems.aws_efa_device_plugin_cpu }}"
143+
memory: "{{ .Cluster.ConfigItems.aws_efa_device_plugin_memory }}"
144+
limits:
145+
cpu: "{{ .Cluster.ConfigItems.aws_efa_device_plugin_cpu }}"
146+
memory: "{{ .Cluster.ConfigItems.aws_efa_device_plugin_memory }}"
147+
volumeMounts:
148+
- name: device-plugin
149+
mountPath: /var/lib/kubelet/device-plugins
150+
- name: infiniband-volume
151+
mountPath: /dev/infiniband/
152+
volumes:
153+
- name: device-plugin
154+
hostPath:
155+
path: /var/lib/kubelet/device-plugins
156+
- name: infiniband-volume
157+
hostPath:
158+
path: /dev/infiniband/
159+
{{ end }}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{{ if eq .Cluster.ConfigItems.aws_efa_device_plugin_enabled "true" }}
2+
apiVersion: v1
3+
kind: ServiceAccount
4+
metadata:
5+
name: aws-efa-k8s-device-plugin
6+
namespace: kube-system
7+
labels:
8+
application: kubernetes
9+
component: aws-efa-k8s-device-plugin
10+
{{ end }}

cluster/manifests/deletions.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,3 +339,11 @@ post_apply:
339339
- name: kube-janitor
340340
kind: ClusterRoleBinding
341341
{{- end }}
342+
{{- if ne .Cluster.ConfigItems.aws_efa_device_plugin_enabled "true"}}
343+
- name: aws-efa-k8s-device-plugin
344+
kind: DaemonSet
345+
namespace: kube-system
346+
- name: aws-efa-k8s-device-plugin
347+
kind: ServiceAccount
348+
namespace: kube-system
349+
{{- end}}

cluster/manifests/deployment-service/01-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ metadata:
55
namespace: kube-system
66
data:
77
aws-account-id: "{{accountID .Cluster.InfrastructureAccount}}"
8+
aws-account-name: "{{.Cluster.AccountName}}"
89
cluster-alias: "{{.Cluster.Alias}}"
910
cluster-vpc-id: "{{.Cluster.ConfigItems.vpc_id}}"
1011
scalyr-team-token: "{{.Cluster.ConfigItems.scalyr_team_token}}"

cluster/manifests/deployment-service/controller-statefulset.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ spec:
2929
terminationGracePeriodSeconds: 300
3030
containers:
3131
- name: "deployment-service-controller"
32-
image: "container-registry.zalando.net/teapot/deployment-controller:master-235"
32+
image: "container-registry.zalando.net/teapot/deployment-controller:master-236"
3333
args:
3434
- "--config-namespace=kube-system"
3535
- "--decrypt-kms-alias-arn=arn:aws:kms:{{ .Cluster.Region }}:{{ .Cluster.InfrastructureAccount | getAWSAccountID }}:alias/deployment-secret"

cluster/manifests/deployment-service/status-service-deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# {{ $image := "container-registry.zalando.net/teapot/deployment-status-service:master-235" }}
1+
# {{ $image := "container-registry.zalando.net/teapot/deployment-status-service:master-236" }}
22
# {{ $version := index (split $image ":") 1 }}
33

44
apiVersion: apps/v1

0 commit comments

Comments
 (0)