Skip to content

Commit f96bb23

Browse files
authored
Merge branch 'dev' into container-registry.zalando.net/teapot/aws-cloud-controller-manager-internal
2 parents 9c1fe01 + 04186b3 commit f96bb23

File tree

81 files changed

+1958
-983
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+1958
-983
lines changed

cluster/cluster.yaml

Lines changed: 258 additions & 52 deletions
Large diffs are not rendered by default.

cluster/config-defaults.yaml

Lines changed: 30 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,6 @@ cluster_autoscaler_max_graceful_termination_sec: "1209600" # 2 weeks
2828
cluster_autoscaler_max_usnchedulable_pods_considered: "1000"
2929

3030
# karpenter settings
31-
# DO NOT SET TO FALSE IF THE CLUSTER HAS KARPENTER POOLS OR NODES. REFER TO TEAPOT DOCS FOR HOW TO ROLLBACK KARPENTER
32-
# https://teapot.docs.zalando.net/howtos/karpenter-operations/
33-
karpenter_pools_enabled: "true"
3431

3532
karpenter_controller_cpu: "25m"
3633
karpenter_controller_memory: "256Mi"
@@ -50,8 +47,15 @@ karpenter_instance_storage_raid0: "true"
5047
# Can be set cluster wide or per node pool
5148
karpenter_in_transit_support_required: "false"
5249

50+
# configure whether we allow t instance families for Karpenter nodes
51+
# t type instances have burstable CPU, which can be undesirable in production
52+
karpenter_instance_family_t_enabled: "false"
53+
54+
# configure whether spot instances should be enabled in Karpenter's capacity-types
55+
karpenter_enable_spot: "true"
56+
5357
# ALB config created by kube-aws-ingress-controller
54-
kube_aws_ingress_controller_ssl_policy: "ELBSecurityPolicy-TLS-1-2-2017-01"
58+
kube_aws_ingress_controller_ssl_policy: "ELBSecurityPolicy-TLS13-1-2-2021-06"
5559
kube_aws_ingress_controller_idle_timeout: "1m"
5660
kube_aws_ingress_controller_deregistration_delay_timeout: "10s"
5761
# allow using NLBs for ingress
@@ -252,14 +256,6 @@ skipper_pod_deletion_cost_controller_resync_interval: "1h"
252256
# polarsignals - only enabled for testing teapot
253257
polarsignals_enabled: "false"
254258

255-
# Emergency Access Service
256-
# Control whether the emergency access service is enabled or not.
257-
{{ if and (eq .Cluster.Environment "production") (eq .Cluster.Provider "zalando-aws") }}
258-
emergency_access_service_enabled: "true"
259-
{{else}}
260-
emergency_access_service_enabled: "false"
261-
{{end}}
262-
263259
# Kube-Metrics-Adapter
264260
## Scheduled scaling metrics: ramp up/down over this period of time
265261
kube_metrics_adapter_default_scaling_window: "10m"
@@ -312,6 +308,12 @@ skipper_serve_method_metric: "false"
312308
# defines if the http response status code is included in the dimension
313309
# of the skipper_serve_host_duration_seconds_bucket metric.
314310
skipper_serve_status_code_metric: "false"
311+
# skipper_combined_response_metrics sets the flag -combined-response-metrics.
312+
# It enables reporting combined response time metrics
313+
skipper_combined_response_metrics: "false"
314+
# skipper_backend_host_metrics sets the flag -backend-host-metrics.
315+
# It enables reporting total serve time metrics for backend
316+
skipper_backend_host_metrics: "false"
315317

316318
# disabled|provisioned|enabled routegroup validation via skipper webhook
317319
# can be one of disabled|provisioned|enabled
@@ -385,7 +387,7 @@ fabric_gateway_controller_enabled: "true"
385387
fabric_gateway_controller_cpu: "50m"
386388
fabric_gateway_controller_memory: "150Mi"
387389
fabric_gateway_controller_allow_all_filters: "false"
388-
fabric_gateway_controller_snapshots_history_limit: "3"
390+
fabric_gateway_controller_snapshots_history_limit: "1"
389391
fabric_gateway_controller_enable_versioning: "true"
390392
fabric_gateway_controller_ssl_policy: ""
391393
fabric_gateway_controller_log_level: "INFO"
@@ -398,12 +400,6 @@ event_rate_limit_enable: "true"
398400
event_rate_limit_config_qps: "500"
399401
event_rate_limit_config_burst: "1000"
400402

401-
# cadvisor settings
402-
cadvisor_cpu: "150m"
403-
cadvisor_memory: "150Mi"
404-
cadvisor_profiling_enabled: "false"
405-
cadvisor_enabled: "false"
406-
407403
# settings for enabling the kubelet-summary-metrics proxy and prometheus metric
408404
# collection.
409405
kubelet_summary_metrics_enabled: "true"
@@ -780,18 +776,12 @@ tracing_coredns_local_zone_traces_endpoint: ""
780776
# AMI id given the image name and the Image AWS account owner.
781777
#
782778
# [0]: https://github.com/zalando-incubator/cluster-lifecycle-manager/blob/8a9bd1cb2d094038a9e23e646421f8146b48886a/provisioner/template.go#L116
783-
kuberuntu_image_v1_31_aws_amd64: {{ amiID "zalando-ubuntu-jammy-22.04-kubernetes-production-v1.31.6-amd64-master-368" "861068367966" }}
784-
kuberuntu_image_v1_31_aws_arm64: {{ amiID "zalando-ubuntu-jammy-22.04-kubernetes-production-v1.31.6-arm64-master-368" "861068367966" }}
785-
kuberuntu_image_v1_31_eks_amd64: {{ amiID "zalando-ubuntu-jammy-22.04-kubernetes-production-v1.31.7-amd64-master-371" "861068367966" }}
786-
kuberuntu_image_v1_31_eks_arm64: {{ amiID "zalando-ubuntu-jammy-22.04-kubernetes-production-v1.31.7-arm64-master-371" "861068367966" }}
779+
kuberuntu_image_v1_32_new_amd64: {{ amiID "zalando-ubuntu-jammy-22.04-kubernetes-production-v1.32.4-amd64-master-373" "861068367966" }}
780+
kuberuntu_image_v1_32_new_arm64: {{ amiID "zalando-ubuntu-jammy-22.04-kubernetes-production-v1.32.4-arm64-master-373" "861068367966" }}
787781

788782
# This is used to determine which AMI to use for the cluster or individual node
789-
# pools. Possible values are 'aws' or 'eks'
790-
{{if eq .Cluster.Provider "zalando-eks"}}
791-
kuberuntu_ami_version: "eks"
792-
{{else}}
793-
kuberuntu_ami_version: "aws"
794-
{{end}}
783+
# pools. Possible values are 'new' or 'old'
784+
kuberuntu_ami_version: "new"
795785

796786
# Feature toggle for auditing events
797787
audit_pod_events: "true"
@@ -928,16 +918,9 @@ external_dns_zones_cache_duration: "1h"
928918
# resource configuration
929919
external_dns_mem: "4Gi"
930920

931-
# select which cache to use for Cluster DNS: unbound or dnsmasq.
932-
dns_cache: "unbound"
933-
934921
expirimental_dns_unbound_liveness_probe: "true"
935922

936923
# DNS container resources
937-
dns_dnsmasq_cpu: "100m"
938-
dns_dnsmasq_mem: "50Mi"
939-
dns_dnsmasq_sidecar_cpu: "10m"
940-
dns_dnsmasq_sidecar_mem: "45Mi"
941924
dns_unbound_cpu: "100m"
942925
dns_unbound_mem: "50Mi"
943926
dns_unbound_exporter_cpu: "10m"
@@ -1104,6 +1087,9 @@ config_provider_service: "false"
11041087
# enable SizeMemoryBackedVolumes feature flag
11051088
enable_size_memory_backed_volumes: "true"
11061089

1090+
# enable ImageVolume feature flag
1091+
enable_image_volumes: "false"
1092+
11071093
# enable StatefulSetAutoDeletePVC feature flag
11081094
# https://kubernetes.io/blog/2021/12/16/kubernetes-1-23-statefulset-pvc-auto-deletion/
11091095
enable_statefulset_autodelete_pvc: "true"
@@ -1143,7 +1129,7 @@ enable_statefulset_autodelete_pvc: "true"
11431129
# Source for the template function: sgIngressRanges: https://github.com/zalando-incubator/cluster-lifecycle-manager/blob/42695865a251fef58e22ce612d6549e75fa5d103/provisioner/template.go#L336-L417
11441130
open_sg_ingress_ranges: ""
11451131

1146-
# Each subdomain can reach a max of 63 bytes on Route53
1132+
# Each DNS label (subdomain) can be 63 octets or less (https://datatracker.ietf.org/doc/html/rfc1035#section-2.3.4)
11471133
# This custom value sets the subdomain max allowed length taking into consideration the 'cname-' prefix added by external-dns
11481134
subdomain_max_length: "57"
11491135

@@ -1269,7 +1255,7 @@ wiz_sensor_cpu: "200m"
12691255
wiz_sensor_memory: "300Mi"
12701256
wiz_connector_cpu: "50m"
12711257
wiz_connector_memory: "150Mi"
1272-
wiz_priority: "false"
1258+
wiz_priority: "true"
12731259
# Please note when this is set to true it allows the use of the node selector feature
12741260
# to deploy the sensor and connector on specific nodes, by manually setting the node selector label on the nodes.
12751261
# This is useful when you want to deploy the sensor and connector on specific nodes.
@@ -1284,6 +1270,8 @@ eks_zalando_iam_aws_proxy_hpa_max_replicas: "10"
12841270
eks_zalando_iam_aws_proxy_hpa_cpu_target: "80"
12851271
eks_zalando_iam_aws_proxy_hpa_memory_target: "80"
12861272
eks_okta_identity_provider: "true"
1273+
eks_legacy_cluster_local_id: "kube-1"
1274+
eks_oidc_issuer_url: "https://"
12871275
eks_fis_support_enabled: "false"
12881276
eks_fis_namespaces: "default"
12891277

@@ -1297,3 +1285,7 @@ aws_vpc_cni_custom_networking: "false"
12971285
aws_vpc_cni_enable_network_policy: "false"
12981286
# specify the network policy enforcement mode.
12991287
aws_vpc_cni_network_policy_enforcing_mode: "standard"
1288+
1289+
# aws-load-balancer-controller resource settings
1290+
aws_load_balancer_controller_cpu: "100m"
1291+
aws_load_balancer_controller_mem_max: "4Gi"

cluster/manifests/01-coredns-local/daemonset-coredns.yaml

Lines changed: 1 addition & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ spec:
4242
cpu: 1m
4343
memory: 50Mi
4444
containers:
45-
{{ if eq .Cluster.ConfigItems.dns_cache "unbound" }}
4645
- name: unbound
4746
{{- if eq .Cluster.Provider "zalando-eks" }}
4847
image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/unbound:1.22.0-master-10
@@ -119,98 +118,9 @@ spec:
119118
- mountPath: /run/unbound
120119
name: unbound-socket
121120
readOnly: false
122-
{{ end }}
123-
{{ if eq .Cluster.ConfigItems.dns_cache "dnsmasq" }}
124-
- name: dnsmasq
125-
{{- if eq .Cluster.Provider "zalando-eks" }}
126-
image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/k8s-dns-dnsmasq-nanny:1.17.4-master-15
127-
{{- else }}
128-
image: container-registry.zalando.net/teapot/k8s-dns-dnsmasq-nanny:1.17.4-master-15
129-
{{- end }}
130-
securityContext:
131-
privileged: true
132-
livenessProbe:
133-
httpGet:
134-
path: /healthcheck/dnsmasq
135-
port: 9054
136-
scheme: HTTP
137-
initialDelaySeconds: 60
138-
timeoutSeconds: 5
139-
successThreshold: 1
140-
failureThreshold: 5
141-
args:
142-
- -v=2
143-
- -logtostderr
144-
- -configDir=/etc/k8s/dns/dnsmasq-nanny
145-
- -restartDnsmasq=true
146-
- --
147-
- --no-resolv
148-
- --keep-in-foreground
149-
- --log-facility=-
150-
- --cache-size=50000
151-
- --dns-forward-max=500
152-
- --neg-ttl=60
153-
# send requests to the last server first, only fallback to the previous ones if it's unreachable
154-
- --strict-order
155-
- --server=10.5.0.11#53 # TODO: fix this for ipv6
156-
- --server={{ if eq .Cluster.ConfigItems.eks_ip_family "ipv4" }}127.0.0.1{{else}}::1{{end}}#9254
157-
ports:
158-
- containerPort: 53
159-
name: dns
160-
protocol: UDP
161-
- containerPort: 53
162-
name: dns-tcp
163-
protocol: TCP
164-
resources:
165-
requests:
166-
ephemeral-storage: 256Mi
167-
limits:
168-
cpu: {{.Cluster.ConfigItems.dns_dnsmasq_cpu}}
169-
memory: {{.Cluster.ConfigItems.dns_dnsmasq_mem}}
170-
lifecycle:
171-
preStop:
172-
sleep:
173-
seconds: 35
174-
- name: sidecar
175-
{{- if eq .Cluster.Provider "zalando-eks" }}
176-
image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/k8s-dns-sidecar:1.17.4-master-15
177-
{{- else }}
178-
image: container-registry.zalando.net/teapot/k8s-dns-sidecar:1.17.4-master-15
179-
{{- end }}
180-
securityContext:
181-
privileged: true
182-
livenessProbe:
183-
httpGet:
184-
path: /metrics
185-
port: 9054
186-
scheme: HTTP
187-
initialDelaySeconds: 60
188-
timeoutSeconds: 5
189-
successThreshold: 1
190-
failureThreshold: 5
191-
args:
192-
- --v=2
193-
- --logtostderr
194-
- --probe=dnsmasq,127.0.0.1:9254,ec2.amazonaws.com,5,A
195-
- --prometheus-port=9054
196-
ports:
197-
- containerPort: 9054
198-
name: metrics
199-
protocol: TCP
200-
resources:
201-
requests:
202-
ephemeral-storage: 256Mi
203-
limits:
204-
cpu: {{.Cluster.ConfigItems.dns_dnsmasq_sidecar_cpu}}
205-
memory: {{.Cluster.ConfigItems.dns_dnsmasq_sidecar_mem}}
206-
lifecycle:
207-
preStop:
208-
sleep:
209-
seconds: 35
210-
{{ end }}
211121
- name: coredns
212122
{{- if eq .Cluster.Provider "zalando-eks" }}
213-
image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/coredns:1.12.0-master-25
123+
image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/coredns:1.12.1-master-26
214124
{{- else }}
215125
image: container-registry.zalando.net/teapot/coredns:1.12.1-master-26
216126
{{- end }}
@@ -299,7 +209,5 @@ spec:
299209
path: Corefile
300210
- key: unbound.conf
301211
path: unbound.conf
302-
{{- if eq .Cluster.ConfigItems.dns_cache "unbound" }}
303212
- name: unbound-socket
304213
emptyDir: {}
305-
{{- end }}

cluster/manifests/02-admission-control/config.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ data:
1313
dns.default.subdomain-max-length: "{{ .Cluster.ConfigItems.subdomain_max_length }}"
1414

1515
generic.prevent-write-operations.enable: "{{ .Cluster.ConfigItems.teapot_admission_controller_prevent_write_operations }}"
16+
{{- if and (eq .Cluster.Provider "zalando-eks") (eq .Cluster.ConfigItems.eks_ip_family "ipv6") }}
17+
generic.inject-albc-defaults.enable: "true"
18+
{{- end }}
1619

1720
pod.container-resource-control.min-memory-request: "25Mi"
1821
pod.container-resource-control.default-cpu-request: "{{ .Cluster.ConfigItems.teapot_admission_controller_default_cpu_request }}"
@@ -29,7 +32,7 @@ data:
2932
pod.service-account-iam.enable: "true"
3033
pod.service-account-iam.base-aws-account-id: "{{ accountID .Cluster.InfrastructureAccount }}"
3134
{{- if eq .Cluster.ConfigItems.teapot_admission_controller_inject_aws_waiter "true" }}
32-
pod.aws-waiter.image: "926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/automata/aws-credentials-waiter:master-257"
35+
pod.aws-waiter.image: "926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/automata/aws-credentials-waiter:master-266"
3336
{{- end }}
3437
pod.env-inject.enable: "{{ .Cluster.ConfigItems.teapot_admission_controller_inject_environment_variables }}"
3538
pod.env-inject.variable._PLATFORM_ACCOUNT: "{{ .Cluster.Alias }}"

cluster/manifests/02-admission-control/deployment.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ spec:
3333
priorityClassName: system-cluster-critical
3434
containers:
3535
- name: admission-controller
36-
image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/admission-controller:master-252
36+
image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/admission-controller:master-260
3737
lifecycle:
3838
preStop:
39-
exec:
40-
command: ["/bin/sh", "-c", "sleep 60"]
39+
sleep:
40+
seconds: 20
4141
readinessProbe:
4242
httpGet:
4343
scheme: HTTPS

cluster/manifests/02-admission-control/teapot.yaml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,48 @@ webhooks:
475475
apiGroups: [""]
476476
apiVersions: ["v1"]
477477
resources: ["services"]
478+
479+
- name: ingress-admitter.teapot.zalan.do
480+
clientConfig:
481+
{{- if eq .Cluster.Provider "zalando-eks"}}
482+
service:
483+
name: "admission-controller"
484+
namespace: "kube-system"
485+
path: "/ingress"
486+
{{- else }}
487+
url: "https://localhost:8085/ingress"
488+
{{- end }}
489+
caBundle: "{{ .Cluster.ConfigItems.ca_cert_decompressed }}"
490+
admissionReviewVersions: ["v1beta1"]
491+
failurePolicy: Fail
492+
sideEffects: "NoneOnDryRun"
493+
matchPolicy: Equivalent
494+
rules:
495+
- operations: [ "CREATE", "UPDATE" ]
496+
apiGroups: ["networking.k8s.io"]
497+
apiVersions: ["v1"]
498+
resources: ["ingresses"]
499+
- name: routegroup-admitter.teapot.zalan.do
500+
clientConfig:
501+
{{- if eq .Cluster.Provider "zalando-eks"}}
502+
service:
503+
name: "admission-controller"
504+
namespace: "kube-system"
505+
path: "/routegroup"
506+
{{- else }}
507+
url: "https://localhost:8085/routegroup"
508+
{{- end }}
509+
caBundle: "{{ .Cluster.ConfigItems.ca_cert_decompressed }}"
510+
admissionReviewVersions: ["v1beta1"]
511+
failurePolicy: Fail
512+
sideEffects: "NoneOnDryRun"
513+
matchPolicy: Equivalent
514+
rules:
515+
- operations: [ "CREATE", "UPDATE" ]
516+
apiGroups: ["zalando.org"]
517+
apiVersions: ["v1"]
518+
resources: ["routegroups"]
519+
478520
{{- if eq .Cluster.ConfigItems.teapot_admission_controller_enable_rolebinding_webhook "true" }}
479521
- name: rolebinding-admitter.teapot.zalan.do
480522
{{- if eq .Cluster.Provider "zalando-eks"}}

cluster/manifests/02-skipper-validation-webhook/deployment.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,16 @@ spec:
3232
priorityClassName: system-cluster-critical
3333
containers:
3434
- name: skipper-admission-webhook
35-
image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/skipper:v0.22.6
35+
image: 926694233939.dkr.ecr.eu-central-1.amazonaws.com/production_namespace/teapot/skipper:v0.22.23
3636
args:
3737
- webhook
3838
- --address=:9085
3939
- --tls-cert-file=/etc/tls-certs/skipper-validation-webhook.pem
4040
- --tls-key-file=/etc/tls-certs/skipper-validation-webhook-key.pem
4141
lifecycle:
4242
preStop:
43-
exec:
44-
command: ["/bin/sh", "-c", " sleep 60"]
43+
sleep:
44+
seconds: 20
4545
readinessProbe:
4646
httpGet:
4747
scheme: HTTPS

cluster/manifests/02-vertical-pod-autoscaler/admission-controller-deployment.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ spec:
2626
containers:
2727
- name: admission-controller
2828
{{if eq .Cluster.ConfigItems.vertical_pod_autoscaler_version "current"}}
29-
image: container-registry.zalando.net/teapot/vpa-admission-controller:v1.3.0-main-8-custom
29+
image: container-registry.zalando.net/teapot/vpa-admission-controller:v1.3.1-main-10-custom
3030
{{else if eq .Cluster.ConfigItems.vertical_pod_autoscaler_version "legacy"}}
31-
image: container-registry.zalando.net/teapot/vpa-admission-controller:v1.2.1-main-6-custom
31+
image: container-registry.zalando.net/teapot/vpa-admission-controller:v1.3.0-main-8-custom
3232
{{end}}
3333
command:
3434
- /admission-controller

0 commit comments

Comments
 (0)