Skip to content

Commit 62cc974

Browse files
authored
Merge pull request #8692 from zalando-incubator/efa-plugin
add aws-efa-device-plugin manifests
2 parents 6441e90 + c6ed42b commit 62cc974

File tree

5 files changed

+185
-0
lines changed

5 files changed

+185
-0
lines changed

cluster/config-defaults.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,11 @@ nvidia_dcgm_exporter_enabled: "false"
427427
nvidia_dcgm_exporter_cpu: "10m"
428428
nvidia_dcgm_exporter_memory: "200Mi"
429429

430+
# AWS EFA device plugin
431+
aws_efa_device_plugin_enabled: "false"
432+
aws_efa_device_plugin_cpu: "10m"
433+
aws_efa_device_plugin_memory: "20Mi"
434+
430435
# static egress controller settings
431436
static_egress_controller_enabled: "true"
432437

cluster/manifests/01-admission-control/config.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ data:
107107
pod.pod-security-policy.privileged-service-accounts.kube-system_efs-provisioner: ""
108108
{{- if eq .Cluster.ConfigItems.s3_csi_driver "true" }}
109109
pod.pod-security-policy.privileged-service-accounts.kube-system_s3-csi-driver: ""
110+
{{- end }}
111+
{{- if eq .Cluster.ConfigItems.aws_efa_device_plugin_enabled "true" }}
112+
pod.pod-security-policy.privileged-service-accounts.kube-system_aws-efa-k8s-device-plugin: ""
110113
{{- end }}
111114
pod.pod-security-policy.privileged-service-accounts.visibility_logging-agent: ""
112115
{{- range $sa := split .Cluster.ConfigItems.teapot_admission_controller_pod_security_policy_privileged_service_accounts "," }}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
{{ if eq .Cluster.ConfigItems.aws_efa_device_plugin_enabled "true" }}
2+
# source: https://github.com/aws/eks-charts/blob/master/stable/aws-efa-k8s-device-plugin/templates/daemonset.yaml
3+
apiVersion: apps/v1
4+
kind: DaemonSet
5+
metadata:
6+
name: aws-efa-k8s-device-plugin
7+
namespace: kube-system
8+
labels:
9+
application: kubernetes
10+
component: aws-efa-k8s-device-plugin
11+
spec:
12+
selector:
13+
matchLabels:
14+
daemonset: aws-efa-k8s-device-plugin
15+
updateStrategy:
16+
type: RollingUpdate
17+
template:
18+
metadata:
19+
labels:
20+
daemonset: aws-efa-k8s-device-plugin
21+
application: kubernetes
22+
component: aws-efa-k8s-device-plugin
23+
annotations:
24+
logging/destination: "{{.Cluster.ConfigItems.log_destination_infra}}"
25+
spec:
26+
serviceAccountName: aws-efa-k8s-device-plugin
27+
tolerations:
28+
- operator: Exists
29+
effect: NoExecute
30+
- operator: Exists
31+
effect: NoSchedule
32+
# Mark this pod as a critical add-on; when enabled, the critical add-on
33+
# scheduler reserves resources for critical add-on pods so that they can
34+
# be rescheduled after a failure.
35+
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
36+
priorityClassName: "system-node-critical"
37+
affinity:
38+
nodeAffinity:
39+
requiredDuringSchedulingIgnoredDuringExecution:
40+
nodeSelectorTerms:
41+
- matchExpressions:
42+
- key: node.kubernetes.io/instance-type
43+
operator: In
44+
values:
45+
- m5dn.24xlarge
46+
- m5n.24xlarge
47+
- m5zn.12xlarge
48+
- m6a.48xlarge
49+
- m6i.32xlarge
50+
- m6id.32xlarge
51+
- m6idn.32xlarge
52+
- m6in.32xlarge
53+
- m7a.48xlarge
54+
- m7g.16xlarge
55+
- m7gd.16xlarge
56+
- m7i.48xlarge
57+
- c5n.9xlarge
58+
- c5n.18xlarge
59+
- c6a.48xlarge
60+
- c6gn.16xlarge
61+
- c6i.32xlarge
62+
- c6id.32xlarge
63+
- c6in.32xlarge
64+
- c7a.48xlarge
65+
- c7g.16xlarge
66+
- c7gd.16xlarge
67+
- c7gn.16xlarge
68+
- c7i.48xlarge
69+
- r5dn.24xlarge
70+
- r5n.24xlarge
71+
- r6a.48xlarge
72+
- r6i.32xlarge
73+
- r6idn.32xlarge
74+
- r6in.32xlarge
75+
- r6id.32xlarge
76+
- r7a.48xlarge
77+
- r7g.16xlarge
78+
- r7gd.16xlarge
79+
- r7i.48xlarge
80+
- r7iz.32xlarge
81+
- x2idn.32xlarge
82+
- x2iedn.32xlarge
83+
- x2iezn.12xlarge
84+
- i3en.12xlarge
85+
- i3en.24xlarge
86+
- i4g.16xlarge
87+
- i4i.32xlarge
88+
- im4gn.16xlarge
89+
- dl1.24xlarge
90+
- dl2q.24xlarge
91+
- g4dn.8xlarge
92+
- g4dn.12xlarge
93+
- g4dn.16xlarge
94+
- g5.8xlarge
95+
- g5.12xlarge
96+
- g5.16xlarge
97+
- g5.24xlarge
98+
- g5.48xlarge
99+
- g6.8xlarge
100+
- g6.12xlarge
101+
- g6.16xlarge
102+
- g6.24xlarge
103+
- g6.48xlarge
104+
- g6e.8xlarge
105+
- g6e.12xlarge
106+
- g6e.16xlarge
107+
- g6e.24xlarge
108+
- g6e.48xlarge
109+
- gr6.8xlarge
110+
- inf1.24xlarge
111+
- p3dn.24xlarge
112+
- p4d.24xlarge
113+
- p4de.24xlarge
114+
- p5.48xlarge
115+
- p5e.48xlarge
116+
- p5en.48xlarge
117+
- trn1.32xlarge
118+
- trn1n.32xlarge
119+
- trn2.48xlarge
120+
- vt1.24xlarge
121+
- hpc6a.48xlarge
122+
- hpc6id.32xlarge
123+
- hpc7a.12xlarge
124+
- hpc7a.24xlarge
125+
- hpc7a.48xlarge
126+
- hpc7a.96xlarge
127+
- hpc7g.4xlarge
128+
- hpc7g.8xlarge
129+
- hpc7g.16xlarge
130+
hostNetwork: true
131+
containers:
132+
- image: container-registry.zalando.net/teapot/aws-efa-k8s-device-plugin:v0.5.4-main-1
133+
name: aws-efa-k8s-device-plugin
134+
securityContext:
135+
allowPrivilegeEscalation: false
136+
capabilities:
137+
drop:
138+
- ALL
139+
runAsNonRoot: false
140+
resources:
141+
requests:
142+
cpu: "{{ .Cluster.ConfigItems.aws_efa_device_plugin_cpu }}"
143+
memory: "{{ .Cluster.ConfigItems.aws_efa_device_plugin_memory }}"
144+
limits:
145+
cpu: "{{ .Cluster.ConfigItems.aws_efa_device_plugin_cpu }}"
146+
memory: "{{ .Cluster.ConfigItems.aws_efa_device_plugin_memory }}"
147+
volumeMounts:
148+
- name: device-plugin
149+
mountPath: /var/lib/kubelet/device-plugins
150+
- name: infiniband-volume
151+
mountPath: /dev/infiniband/
152+
volumes:
153+
- name: device-plugin
154+
hostPath:
155+
path: /var/lib/kubelet/device-plugins
156+
- name: infiniband-volume
157+
hostPath:
158+
path: /dev/infiniband/
159+
{{ end }}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{{ if eq .Cluster.ConfigItems.aws_efa_device_plugin_enabled "true" }}
2+
apiVersion: v1
3+
kind: ServiceAccount
4+
metadata:
5+
name: aws-efa-k8s-device-plugin
6+
namespace: kube-system
7+
labels:
8+
application: kubernetes
9+
component: aws-efa-k8s-device-plugin
10+
{{ end }}

cluster/manifests/deletions.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,3 +339,11 @@ post_apply:
339339
- name: kube-janitor
340340
kind: ClusterRoleBinding
341341
{{- end }}
342+
{{- if ne .Cluster.ConfigItems.aws_efa_device_plugin_enabled "true"}}
343+
- name: aws-efa-k8s-device-plugin
344+
kind: DaemonSet
345+
namespace: kube-system
346+
- name: aws-efa-k8s-device-plugin
347+
kind: ServiceAccount
348+
namespace: kube-system
349+
{{- end}}

0 commit comments

Comments
 (0)