Skip to content

Commit 90710e2

Browse files
authored
v1: fix upgrade of older clusters (#815)
The sts initContainer code used `okToPatch` which attempted to avoid a superfluous restart of the sts the first time it was patched for the new bootstrap mechanism. Since then, the same CEL-based patching has been included in both node configuration and bootstrap, and is thus required on any sts change (that is: the initContainer must not fail), so we include the required env vars unconditionally here.
1 parent 08eeeec commit 90710e2

File tree

4 files changed

+39
-67
lines changed

4 files changed

+39
-67
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
project: operator
2+
kind: Fixed
3+
body: |-
4+
The operator now unconditionally produces statefulsets that have environment variables available to the initContainer that are used for CEL-based config patching.
5+
6+
Previously it attempted to leave existing sts resources unpatched if it seemed like they had already been bootstrapped. With the adoption of CEL patching for node configuration, that left sts pods unable to restart.
7+
time: 2025-05-06T11:13:03.863535+01:00
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
project: operator
2+
kind: Fixed
3+
body: |-
4+
The operator now unconditionally produces an environment for the initContainer that supports CEL-based patching.
5+
6+
This is required to ensure that a pre-existing sts can roll over to new configuration correctly.
7+
time: 2025-05-06T11:14:14.360346+01:00

operator/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ operator helm chart. The same ports will continue to serve metrics using kubebui
6767
if correct FullNameOverride is not provided and handled the same way for both
6868
client creation and render function.
6969
* The Redpanda license was not set by operator. Now it will be set in the first reconciliation. After initial setup the consequent license re-set will be reconciled after client-go cache resync timeout (default 10h).
70+
* The operator now unconditionally produces statefulsets that have environment variables available to the initContainer that are used for CEL-based config patching.
71+
72+
Previously it attempted to leave existing sts resources unpatched if it seemed like they had already been bootstrapped. With the adoption of CEL patching for node configuration, that left sts pods unable to restart.
73+
* The operator now unconditionally produces an environment for the initContainer that supports CEL-based patching.
74+
75+
This is required to ensure that a pre-existing sts can roll over to new configuration correctly.
7076

7177
## [v25.1.1-beta2](https://github.com/redpanda-data/redpanda-operator/releases/tag/operator%2Fv25.1.1-beta2) - 2025-04-24
7278
### Added

operator/pkg/resources/statefulset.go

Lines changed: 19 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -723,37 +723,27 @@ func (r *StatefulSetResource) obj(
723723
}
724724
}
725725

726-
okToPatch, err := r.canOverwriteBootstrapForAnyPreexistingResource(ctx, ss)
726+
// Bootstrap configuration and node configuration uses templating.
727+
// If the operator encounters a STS that does not yet have the additional env
728+
// variables it requires defined, then this may cause an STS roll since the
729+
// initContainer definition will change to accommodate the new env vars.
730+
// This roll should happen only once.
731+
ss.Spec.Template.Spec.InitContainers[0].Env = append(ss.Spec.Template.Spec.InitContainers[0].Env,
732+
corev1.EnvVar{
733+
Name: bootstrapTemplateEnvVar,
734+
Value: filepath.Join(configSourceDir, bootstrapTemplateFile),
735+
},
736+
corev1.EnvVar{
737+
Name: bootstrapDestinationEnvVar,
738+
Value: filepath.Join(configDestinationDir, bootstrapConfigFile),
739+
},
740+
)
741+
// If the bootstrap template needs additional env vars, inject them here
742+
additionalEnv, err := r.cfg.AdditionalInitEnvVars()
727743
if err != nil {
728-
return nil, err
729-
}
730-
if okToPatch {
731-
// Bootstrap configuration is now templated; we don't need to mount the original here
732-
ss.Spec.Template.Spec.InitContainers[0].Env = append(ss.Spec.Template.Spec.InitContainers[0].Env,
733-
corev1.EnvVar{
734-
Name: bootstrapTemplateEnvVar,
735-
Value: filepath.Join(configSourceDir, bootstrapTemplateFile),
736-
},
737-
corev1.EnvVar{
738-
Name: bootstrapDestinationEnvVar,
739-
Value: filepath.Join(configDestinationDir, bootstrapConfigFile),
740-
},
741-
)
742-
// If the bootstrap template needs additional env vars, inject them here
743-
additionalEnv, err := r.cfg.AdditionalInitEnvVars()
744-
if err != nil {
745-
return nil, fmt.Errorf("cannot retrieve the configuration to extract environment variables: %w", err)
746-
}
747-
ss.Spec.Template.Spec.InitContainers[0].Env = append(ss.Spec.Template.Spec.InitContainers[0].Env, additionalEnv...)
748-
} else {
749-
// TODO: drop this. It'll trigger a restart when the operator moves, but as per discussions that's considered acceptable.
750-
// Keep the old .bootstrap.yaml in place.
751-
ss.Spec.Template.Spec.Containers[0].VolumeMounts = append(ss.Spec.Template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{
752-
Name: "configmap-dir",
753-
MountPath: path.Join(configDestinationDir, bootstrapConfigFile),
754-
SubPath: bootstrapConfigFile,
755-
})
744+
return nil, fmt.Errorf("cannot retrieve the configuration to extract environment variables: %w", err)
756745
}
746+
ss.Spec.Template.Spec.InitContainers[0].Env = append(ss.Spec.Template.Spec.InitContainers[0].Env, additionalEnv...)
757747

758748
setVolumes(ss, r.pandaCluster, r.nodePool.Storage, r.nodePool.CloudCacheStorage)
759749

@@ -770,44 +760,6 @@ func (r *StatefulSetResource) obj(
770760
return ss, nil
771761
}
772762

773-
// checkForPreexistingResource will check if the resource already exists.
774-
// If it does, we may want to avoid patching it to avoid a superfluous restart.
775-
func (r *StatefulSetResource) canOverwriteBootstrapForAnyPreexistingResource(ctx context.Context, sts *appsv1.StatefulSet) (bool, error) {
776-
// Attempt to fetch the resource
777-
var oldObj appsv1.StatefulSet
778-
err := r.Get(ctx, types.NamespacedName{
779-
Namespace: sts.Namespace,
780-
Name: sts.Name,
781-
}, &oldObj)
782-
if err != nil {
783-
if apierrors.IsNotFound(err) {
784-
// We should create the object fully patched, since it doesn't exist
785-
return true, nil
786-
}
787-
return false, err
788-
}
789-
// Check to see if we have the signature env variable in the initContainer;
790-
// if so we're good to continue.
791-
if len(oldObj.Spec.Template.Spec.InitContainers) > 0 {
792-
for _, ev := range oldObj.Spec.Template.Spec.InitContainers[0].Env {
793-
if ev.Name == bootstrapTemplateEnvVar {
794-
// This already has the hooks; it's fine to patch.
795-
return true, nil
796-
}
797-
}
798-
// Check to see if we are changing any images; if so, we can patch.
799-
if sts.Spec.Template.Spec.InitContainers[0].Image != r.fullConfiguratorImage() {
800-
return true, nil
801-
}
802-
}
803-
if len(sts.Spec.Template.Spec.Containers) > 0 && sts.Spec.Template.Spec.Containers[0].Image != r.pandaCluster.FullImageName() {
804-
return true, nil
805-
}
806-
// Otherwise, we'll leave this untouched for the moment; it's already running,
807-
// and so the bootstrap configuration is irrelevant.
808-
return false, nil
809-
}
810-
811763
func (r *StatefulSetResource) getConfiguratorArgs() []string {
812764
result := []string{"configure"}
813765
if r.configuratorSettings.CloudSecretsEnabled {

0 commit comments

Comments
 (0)