From 39c77c48c39664398c49062c8795186a397dfca5 Mon Sep 17 00:00:00 2001 From: Marco Ebert Date: Thu, 16 Jan 2025 08:08:21 +0100 Subject: [PATCH 1/4] Chart: Reduce default etcd volume size to 50 GB. (#994) --- CHANGELOG.md | 4 ++++ helm/cluster-aws/README.md | 2 +- helm/cluster-aws/values.schema.json | 2 +- helm/cluster-aws/values.yaml | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e52b4bbae..6c318e78f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Chart: Reduce default etcd volume size to 50 GB. + ## [1.3.5] - 2024-12-12 ### Added diff --git a/helm/cluster-aws/README.md b/helm/cluster-aws/README.md index 0a9796bb5..cb9e46c0b 100644 --- a/helm/cluster-aws/README.md +++ b/helm/cluster-aws/README.md @@ -334,7 +334,7 @@ Properties within the `.global.controlPlane` object | `global.controlPlane.apiExtraCertSANs[*]` | **cert SAN**|**Type:** `string`
| | `global.controlPlane.apiMode` | **API mode** - Whether the Kubernetes API server load balancer should be reachable from the internet (public) or internal only (private).|**Type:** `string`
**Default:** `"public"`| | `global.controlPlane.apiServerPort` | **API server port** - The API server Load Balancer port. This option sets the Spec.ClusterNetwork.APIServerPort field on the Cluster CR. In CAPI this field isn't used currently. It is instead used in providers. In CAPA this sets only the public facing port of the Load Balancer. In CAPZ both the public facing and the destination port are set to this value. CAPV and CAPVCD do not use it.|**Type:** `integer`
**Default:** `443`| -| `global.controlPlane.etcdVolumeSizeGB` | **Etcd volume size (GB)**|**Type:** `integer`
**Default:** `100`| +| `global.controlPlane.etcdVolumeSizeGB` | **Etcd volume size (GB)**|**Type:** `integer`
**Default:** `50`| | `global.controlPlane.instanceType` | **EC2 instance type**|**Type:** `string`
**Default:** `"r6i.xlarge"`| | `global.controlPlane.libVolumeSizeGB` | **Lib volume size (GB)** - Size of the volume mounted at `/var/lib` on the control plane nodes. This disk is shared between kubelet folder `/var/lib/kubelet` and containerd folder `/var/lib/containerd`.|**Type:** `integer`
**Default:** `40`| | `global.controlPlane.loadBalancerIngressAllowCidrBlocks` | **Load balancer allow list** - IPv4 address ranges that are allowed to connect to the control plane load balancer, in CIDR notation. When setting this field, remember to add the Management cluster Nat Gateway IPs provided by Giant Swarm so that the cluster can still be managed. These Nat Gateway IPs can be found in the Management Cluster AWSCluster '.status.networkStatus.natGatewaysIPs' field.|**Type:** `array`
| diff --git a/helm/cluster-aws/values.schema.json b/helm/cluster-aws/values.schema.json index 8393bfdaa..53de52e75 100644 --- a/helm/cluster-aws/values.schema.json +++ b/helm/cluster-aws/values.schema.json @@ -1498,7 +1498,7 @@ "etcdVolumeSizeGB": { "type": "integer", "title": "Etcd volume size (GB)", - "default": 100 + "default": 50 }, "instanceType": { "type": "string", diff --git a/helm/cluster-aws/values.yaml b/helm/cluster-aws/values.yaml index 2450eb7ce..8a1d46c0f 100644 --- a/helm/cluster-aws/values.yaml +++ b/helm/cluster-aws/values.yaml @@ -358,7 +358,7 @@ global: controlPlane: apiMode: public apiServerPort: 443 - etcdVolumeSizeGB: 100 + etcdVolumeSizeGB: 50 instanceType: r6i.xlarge libVolumeSizeGB: 40 logVolumeSizeGB: 15 From 9decee5de7e8e373710447dad3cda3a610098b7e Mon Sep 17 00:00:00 2001 From: Andreas Sommer Date: Thu, 16 Jan 2025 21:09:32 +0100 Subject: [PATCH 2/4] Explicitly set Ignition user data storage type to S3 bucket objects for machine pools (#981) --- CHANGELOG.md | 1 + helm/cluster-aws/templates/_machine_pools.tpl | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c318e78f..884b13a93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Chart: Reduce default etcd volume size to 50 GB. +- Explicitly set Ignition user data storage type to S3 bucket objects for machine pools ## [1.3.5] - 2024-12-12 diff --git a/helm/cluster-aws/templates/_machine_pools.tpl b/helm/cluster-aws/templates/_machine_pools.tpl index c0b8a58cb..286eab432 100644 --- a/helm/cluster-aws/templates/_machine_pools.tpl +++ b/helm/cluster-aws/templates/_machine_pools.tpl @@ -86,6 +86,7 @@ spec: instanceWarmup: {{ $value.instanceWarmup | default 600 }} minHealthyPercentage: {{ $value.minHealthyPercentage | default 90 }} ignition: + storageType: ClusterObjectStore # store user data in S3 bucket version: "3.4" lifecycleHooks: - defaultResult: CONTINUE From 28fa2e6fc3c54a793c456374fddda03b70f56be5 Mon Sep 17 00:00:00 2001 From: Andreas Sommer Date: Thu, 23 Jan 2025 13:25:32 +0100 Subject: [PATCH 3/4] Use reduced IAM permissions on worker nodes instance profile (#991) --- CHANGELOG.md | 1 + helm/cluster-aws/README.md | 1 + helm/cluster-aws/templates/_machine_pools.tpl | 6 ++++++ helm/cluster-aws/values.schema.json | 6 ++++++ helm/cluster-aws/values.yaml | 1 + 5 files changed, 15 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 884b13a93..2ec64b5c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Chart: Reduce default etcd volume size to 50 GB. - Explicitly set Ignition user data storage type to S3 bucket objects for machine pools +- Use reduced IAM permissions on worker nodes instance profile. This can be toggled back with `global.providerSpecific.reducedInstanceProfileIamPermissionsForWorkers`. ## [1.3.5] - 2024-12-12 diff --git a/helm/cluster-aws/README.md b/helm/cluster-aws/README.md index cb9e46c0b..b54bc1595 100644 --- a/helm/cluster-aws/README.md +++ b/helm/cluster-aws/README.md @@ -26,6 +26,7 @@ Properties within the `.global.providerSpecific` object | `global.providerSpecific.instanceMetadataOptions` | **Instance metadata options** - Instance metadata options for the EC2 instances in the cluster.|**Type:** `object`
| | `global.providerSpecific.instanceMetadataOptions.httpTokens` | **HTTP tokens** - The state of token usage for your instance metadata requests. If you set this parameter to `optional`, you can use either IMDSv1 or IMDSv2. If you set this parameter to `required`, you must use a IMDSv2 to access the instance metadata endpoint. Learn more at [What’s new in IMDSv2](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html).|**Type:** `string`
**Default:** `"required"`| | `global.providerSpecific.nodePoolAmi` | **Amazon machine image (AMI) for node pools** - If specified, this image will be used to provision EC2 instances for node pools.|**Type:** `string`
| +| `global.providerSpecific.reducedInstanceProfileIamPermissionsForWorkers` | **Use reduced IAM permissions on worker nodes instance profile** - Defaults to true. If something breaks, this can temporarily be disabled in order to bring certain IAM permissions (e.g. EC2) back for the worker nodes' IAM instance profile. Applications must use [IRSA](https://docs.giantswarm.io/tutorials/access-management/iam-roles-for-service-accounts/) to authenticate with the AWS API instead of falling back to the instance profile.|**Type:** `boolean`
**Default:** `true`| | `global.providerSpecific.region` | **Region**|**Type:** `string`
| ### Apps diff --git a/helm/cluster-aws/templates/_machine_pools.tpl b/helm/cluster-aws/templates/_machine_pools.tpl index 286eab432..547a16c54 100644 --- a/helm/cluster-aws/templates/_machine_pools.tpl +++ b/helm/cluster-aws/templates/_machine_pools.tpl @@ -6,6 +6,12 @@ metadata: labels: giantswarm.io/machine-pool: {{ include "resource.default.name" $ }}-{{ $name }} {{- include "labels.common" $ | nindent 4 }} + {{- if (required "global.providerSpecific.reducedInstanceProfileIamPermissionsForWorkers is required" $.Values.global.providerSpecific.reducedInstanceProfileIamPermissionsForWorkers) }} + alpha.aws.giantswarm.io/reduced-instance-permissions-workers: "true" + {{- end }} + {{- if eq (required "global.connectivity.cilium.ipamMode is required" $.Values.global.connectivity.cilium.ipamMode) "eni" }} + alpha.aws.giantswarm.io/ipam-mode: "eni" + {{- end }} app.kubernetes.io/version: {{ $.Chart.Version | quote }} name: {{ include "resource.default.name" $ }}-{{ $name }} namespace: {{ $.Release.Namespace }} diff --git a/helm/cluster-aws/values.schema.json b/helm/cluster-aws/values.schema.json index 53de52e75..81859251b 100644 --- a/helm/cluster-aws/values.schema.json +++ b/helm/cluster-aws/values.schema.json @@ -1745,6 +1745,12 @@ "title": "Amazon machine image (AMI) for node pools", "description": "If specified, this image will be used to provision EC2 instances for node pools." }, + "reducedInstanceProfileIamPermissionsForWorkers": { + "type": "boolean", + "title": "Use reduced IAM permissions on worker nodes instance profile", + "description": "Defaults to true. If something breaks, this can temporarily be disabled in order to bring certain IAM permissions (e.g. EC2) back for the worker nodes' IAM instance profile. Applications must use [IRSA](https://docs.giantswarm.io/tutorials/access-management/iam-roles-for-service-accounts/) to authenticate with the AWS API instead of falling back to the instance profile.", + "default": true + }, "region": { "type": "string", "title": "Region" diff --git a/helm/cluster-aws/values.yaml b/helm/cluster-aws/values.yaml index 8a1d46c0f..b6f654d7c 100644 --- a/helm/cluster-aws/values.yaml +++ b/helm/cluster-aws/values.yaml @@ -380,6 +380,7 @@ global: flatcarAwsAccount: "706635527432" instanceMetadataOptions: httpTokens: required + reducedInstanceProfileIamPermissionsForWorkers: true release: {} internal: {} kubectlImage: From 29c29d6ec3aa1859114eef55e678fc884a61247e Mon Sep 17 00:00:00 2001 From: Andreas Sommer Date: Thu, 23 Jan 2025 14:57:24 +0100 Subject: [PATCH 4/4] Explicitly set aws-node-termination-handler queue region so crash-loops are avoided, allowing faster startup (#977) --- CHANGELOG.md | 4 ++++ helm/cluster-aws/templates/aws-nth-app.yaml | 2 ++ 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ec64b5c9..6596ec96d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Explicitly set Ignition user data storage type to S3 bucket objects for machine pools - Use reduced IAM permissions on worker nodes instance profile. This can be toggled back with `global.providerSpecific.reducedInstanceProfileIamPermissionsForWorkers`. +### Fixed + +- Explicitly set aws-node-termination-handler queue region so crash-loops are avoided, allowing faster startup + ## [1.3.5] - 2024-12-12 ### Added diff --git a/helm/cluster-aws/templates/aws-nth-app.yaml b/helm/cluster-aws/templates/aws-nth-app.yaml index 71ba1848d..6c40b5c35 100644 --- a/helm/cluster-aws/templates/aws-nth-app.yaml +++ b/helm/cluster-aws/templates/aws-nth-app.yaml @@ -3,6 +3,8 @@ {{- define "defaultAwsNodeTerminationHandlerHelmValues" }} awsNodeTerminationHandler: values: + awsRegion: {{ include "aws-region" $ | quote }} + image: registry: {{ include "awsContainerImageRegistry" $ }}