diff --git a/apis/v1alpha1/ack-generate-metadata.yaml b/apis/v1alpha1/ack-generate-metadata.yaml index 5602cbc..b6afffa 100755 --- a/apis/v1alpha1/ack-generate-metadata.yaml +++ b/apis/v1alpha1/ack-generate-metadata.yaml @@ -1,8 +1,8 @@ ack_generate_info: - build_date: "2025-07-30T22:50:35Z" - build_hash: b2dc0f44e0b08f041de14c3944a5cc005ba97c8f + build_date: "2025-08-06T18:46:04Z" + build_hash: b4fbf4e427daaef74ed873aac01e4a9ca68fb479 go_version: go1.24.5 - version: v0.50.0 + version: v0.50.0-3-gb4fbf4e api_directory_checksum: 2b5e65a1d5f0a032d51209f925b714aff4b6dc96 api_version: v1alpha1 aws_sdk_go_version: v1.37.0 diff --git a/helm/templates/caches-role-binding.yaml b/helm/templates/caches-role-binding.yaml index 105d3d1..20301d3 100644 --- a/helm/templates/caches-role-binding.yaml +++ b/helm/templates/caches-role-binding.yaml @@ -1,7 +1,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: {{ include "ack-eks-controller.app.fullname" . }}-namespace-caches + name: {{ include "ack-eks-controller.app.fullname" . }}-namespaces-cache labels: app.kubernetes.io/name: {{ include "ack-eks-controller.app.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} @@ -12,7 +12,7 @@ metadata: roleRef: kind: ClusterRole apiGroup: rbac.authorization.k8s.io - name: {{ include "ack-eks-controller.app.fullname" . }}-namespace-caches + name: {{ include "ack-eks-controller.app.fullname" . }}-namespaces-cache subjects: - kind: ServiceAccount name: {{ include "ack-eks-controller.service-account.name" . }} diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index 58150ba..920d9af 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -205,7 +205,7 @@ spec: secretName: {{ .Values.aws.credentials.secretName }} {{- end }} {{- if .Values.deployment.extraVolumes }} - {{ toYaml .Values.deployment.extraVolumes | indent 8 }} + {{- toYaml .Values.deployment.extraVolumes | nindent 8 }} {{- end }} {{- end }} {{- with .Values.deployment.strategy }} diff --git a/pkg/resource/addon/hooks.go b/pkg/resource/addon/hooks.go index 6ebd1e6..d85e42b 100644 --- a/pkg/resource/addon/hooks.go +++ b/pkg/resource/addon/hooks.go @@ -52,8 +52,6 @@ var ( var ( // TerminalStatuses defines the list of statuses that are terminal for an addon TerminalStatuses = []string{ - StatusCreateFailed, - StatusUpdateFailed, StatusDeleteFailed, // Still not sure if we should consider DEGRADED as terminal // StatusDegraded, @@ -101,6 +99,16 @@ func addonHasTerminalStatus(r *resource) bool { return false } +// addonInFailedState returns true if the supplied addon is in a failed state +// that requires retry (CREATE_FAILED or UPDATE_FAILED) +func addonInFailedState(r *resource) bool { + if r.ko.Status.Status == nil { + return false + } + cs := *r.ko.Status.Status + return cs == StatusCreateFailed || cs == StatusUpdateFailed +} + // requeueWaitUntilCanModify returns a `ackrequeue.RequeueNeededAfter` struct // explaining the addon cannot be modified until it reaches an active status. func requeueWaitUntilCanModify(r *resource) *ackrequeue.RequeueNeededAfter { diff --git a/pkg/resource/addon/sdk.go b/pkg/resource/addon/sdk.go index ca47601..38805d9 100644 --- a/pkg/resource/addon/sdk.go +++ b/pkg/resource/addon/sdk.go @@ -436,7 +436,11 @@ func (rm *resourceManager) sdkUpdate( ackcondition.SetSynced(latest, corev1.ConditionFalse, &msg, nil) return latest, requeueWaitWhileDeleting } - if !addonActive(latest) { + + // Check if addon is in a failed state that requires retry + inFailedState := addonInFailedState(latest) + + if !addonActive(latest) && !inFailedState { msg := "Addon is in '" + *latest.ko.Status.Status + "' status" ackcondition.SetSynced(latest, corev1.ConditionFalse, &msg, nil) if addonHasTerminalStatus(latest) { @@ -446,6 +450,12 @@ func (rm *resourceManager) sdkUpdate( return latest, requeueWaitUntilCanModify(latest) } + // If addon is in failed state, we need to force an update regardless of delta + if inFailedState { + msg := "Addon is in '" + *latest.ko.Status.Status + "' status, attempting recovery" + ackcondition.SetSynced(latest, corev1.ConditionFalse, &msg, nil) + } + if delta.DifferentAt("Spec.Tags") { err := syncTags( ctx, rm.sdkapi, rm.metrics, @@ -456,7 +466,9 @@ func (rm *resourceManager) sdkUpdate( return nil, err } } - if !delta.DifferentExcept("Spec.Tags") { + // If addon is in failed state, always attempt update to recover + // Otherwise, check if there are differences to update + if !inFailedState && !delta.DifferentExcept("Spec.Tags") { return desired, nil } input, err := rm.newUpdateRequestPayload(ctx, desired, delta) diff --git a/pkg/resource/nodegroup/sdk.go b/pkg/resource/nodegroup/sdk.go index 7c99718..a3fab53 100644 --- a/pkg/resource/nodegroup/sdk.go +++ b/pkg/resource/nodegroup/sdk.go @@ -608,7 +608,7 @@ func (rm *resourceManager) newCreateRequestPayload( if r.ko.Spec.DiskSize != nil { diskSizeCopy0 := *r.ko.Spec.DiskSize if diskSizeCopy0 > math.MaxInt32 || diskSizeCopy0 < math.MinInt32 { - return nil, fmt.Errorf("error: field diskSize is of type int32") + return nil, fmt.Errorf("error: field DiskSize is of type int32") } diskSizeCopy := int32(diskSizeCopy0) res.DiskSize = &diskSizeCopy diff --git a/templates/hooks/addons/sdk_update_pre_build_request.go.tpl b/templates/hooks/addons/sdk_update_pre_build_request.go.tpl index 4d1fa03..c380a29 100644 --- a/templates/hooks/addons/sdk_update_pre_build_request.go.tpl +++ b/templates/hooks/addons/sdk_update_pre_build_request.go.tpl @@ -3,7 +3,11 @@ ackcondition.SetSynced(latest, corev1.ConditionFalse, &msg, nil) return latest, requeueWaitWhileDeleting } - if !addonActive(latest) { + + // Check if addon is in a failed state that requires retry + inFailedState := addonInFailedState(latest) + + if !addonActive(latest) && !inFailedState { msg := "Addon is in '" + *latest.ko.Status.Status + "' status" ackcondition.SetSynced(latest, corev1.ConditionFalse, &msg, nil) if addonHasTerminalStatus(latest) { @@ -13,16 +17,24 @@ return latest, requeueWaitUntilCanModify(latest) } + // If addon is in failed state, we need to force an update regardless of delta + if inFailedState { + msg := "Addon is in '" + *latest.ko.Status.Status + "' status, attempting recovery" + ackcondition.SetSynced(latest, corev1.ConditionFalse, &msg, nil) + } + if delta.DifferentAt("Spec.Tags") { err := syncTags( - ctx, rm.sdkapi, rm.metrics, - string(*desired.ko.Status.ACKResourceMetadata.ARN), + ctx, rm.sdkapi, rm.metrics, + string(*desired.ko.Status.ACKResourceMetadata.ARN), aws.ToStringMap(desired.ko.Spec.Tags), aws.ToStringMap(latest.ko.Spec.Tags), ) if err != nil { return nil, err } } - if !delta.DifferentExcept("Spec.Tags"){ - return desired, nil - } \ No newline at end of file + // If addon is in failed state, always attempt update to recover + // Otherwise, check if there are differences to update + if !inFailedState && !delta.DifferentExcept("Spec.Tags") { + return desired, nil + } \ No newline at end of file