Skip to content

Commit 8e8d9c3

Browse files
committed
operator: Restart rolling update when Statefulset changes
This PR enhances the detection mechanism for Node Pool rolling upgrades. Previously, when a Node Pool rolling upgrade was detected, all Pods were marked with the `ClusterUpdate` Pod Status Condition, and the `restarting` Cluster Custom Resource Status Node Pool field was marked as `true`. If a modification to the Cluster Custom Resource occurred during an ongoing rolling upgrade (e.g., a change to the Redpanda container tag), it could trigger an update to the StatefulSet resource. In such cases, Pods that had already been restarted within the affected Node Pool were not restarted again, resulting in inconsistent state propagation (e.g., mismatched Redpanda container tags). With this change, updates to the StatefulSet are explicitly distinguished from Node Pool rolling upgrades. When a StatefulSet update is required, all associated Pods are now marked or re-marked with the `ClusterUpdate` Pod Status Condition to ensure consistent rollout of the updated specification.
1 parent c59ff49 commit 8e8d9c3

File tree

2 files changed

+39
-10
lines changed

2 files changed

+39
-10
lines changed

operator/pkg/resources/statefulset_update.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,22 @@ func (r *StatefulSetResource) runUpdate(
9494
// Check if we should run update on this specific STS.
9595

9696
log.V(logger.DebugLevel).Info("Checking that we should update")
97-
update, err := r.shouldUpdate(current, modified)
97+
stsChanged, nodePoolRestarting, err := r.shouldUpdate(current, modified)
9898
if err != nil {
9999
return fmt.Errorf("unable to determine the update procedure: %w", err)
100100
}
101101

102-
if !update {
102+
if !stsChanged && !nodePoolRestarting {
103103
return nil
104104
}
105105

106+
if stsChanged {
107+
log.V(logger.DebugLevel).Info("Setting ClusterUpdate condition on pods")
108+
if err = r.MarkPodsForUpdate(ctx, ClusterUpdateReasonVersion); err != nil {
109+
return fmt.Errorf("unable to mark pods for update: %w", err)
110+
}
111+
}
112+
106113
// At this point, we have seen a diff and want to update the StatefulSet.
107114
npStatus := r.getNodePoolStatus()
108115

@@ -615,13 +622,13 @@ func (r *StatefulSetResource) updateStatefulSet(
615622
// shouldUpdate returns true if changes on the CR require update
616623
func (r *StatefulSetResource) shouldUpdate(
617624
current, modified *appsv1.StatefulSet,
618-
) (bool, error) {
625+
) (bool, bool, error) {
619626
log := r.logger.WithName("shouldUpdate")
620627

621628
npStatus := r.getNodePoolStatus()
622629

623630
if npStatus.Restarting || r.pandaCluster.Status.Restarting {
624-
return true, nil
631+
return false, true, nil
625632
}
626633
prepareResourceForPatch(current, modified)
627634
opts := []patch.CalculateOption{
@@ -632,10 +639,10 @@ func (r *StatefulSetResource) shouldUpdate(
632639
}
633640
patchResult, err := patch.NewPatchMaker(patch.NewAnnotator(redpandaAnnotatorKey), &patch.K8sStrategicMergePatcher{}, &patch.BaseJSONMergePatcher{}).Calculate(current, modified, opts...)
634641
if err != nil || patchResult.IsEmpty() {
635-
return false, err
642+
return false, false, err
636643
}
637644
log.Info("Detected diff", "patchResult", string(patchResult.Patch))
638-
return true, nil
645+
return true, false, nil
639646
}
640647

641648
func (r *StatefulSetResource) getRestartingStatus() bool {

operator/pkg/resources/statefulset_update_test.go

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,36 @@ func TestShouldUpdate_AnnotationChange(t *testing.T) {
163163
},
164164
},
165165
}
166-
update, err := ssres.shouldUpdate(sts, stsWithAnnotation)
166+
stsChange, _, err := ssres.shouldUpdate(sts, stsWithAnnotation)
167167
require.NoError(t, err)
168-
require.True(t, update)
168+
require.True(t, stsChange)
169169

170170
// same statefulset with same annotation
171-
update, err = ssres.shouldUpdate(stsWithAnnotation, stsWithAnnotation)
171+
stsChange, _, err = ssres.shouldUpdate(stsWithAnnotation, stsWithAnnotation)
172172
require.NoError(t, err)
173-
require.False(t, update)
173+
require.False(t, stsChange)
174+
175+
ssres = StatefulSetResource{
176+
nodePool: vectorizedv1alpha1.NodePoolSpecWithDeleted{
177+
NodePoolSpec: vectorizedv1alpha1.NodePoolSpec{
178+
Name: "default",
179+
},
180+
},
181+
pandaCluster: &vectorizedv1alpha1.Cluster{
182+
Status: vectorizedv1alpha1.ClusterStatus{
183+
Restarting: false,
184+
NodePools: map[string]vectorizedv1alpha1.NodePoolStatus{
185+
"default": {
186+
Restarting: true,
187+
},
188+
},
189+
},
190+
},
191+
}
192+
// same statefulset with same annotation, but with node pool status restarting
193+
_, nodePoolRestarting, err := ssres.shouldUpdate(stsWithAnnotation, stsWithAnnotation)
194+
require.NoError(t, err)
195+
require.True(t, nodePoolRestarting)
174196
}
175197

176198
func TestPutInMaintenanceMode(t *testing.T) {

0 commit comments

Comments
 (0)