diff --git a/operator/pkg/resources/statefulset_update.go b/operator/pkg/resources/statefulset_update.go index e2670b734..debf3e544 100644 --- a/operator/pkg/resources/statefulset_update.go +++ b/operator/pkg/resources/statefulset_update.go @@ -94,15 +94,23 @@ func (r *StatefulSetResource) runUpdate( // Check if we should run update on this specific STS. log.V(logger.DebugLevel).Info("Checking that we should update") - update, err := r.shouldUpdate(current, modified) + stsChanged, nodePoolRestarting, err := r.shouldUpdate(current, modified) if err != nil { return fmt.Errorf("unable to determine the update procedure: %w", err) } - if !update { + if !stsChanged && !nodePoolRestarting { return nil } + // When a StatefulSet change is detected, mark all Node Pool Pods as requiring a Pod rolling restart + if stsChanged { + log.V(logger.DebugLevel).Info("Setting ClusterUpdate condition on pods") + if err = r.MarkPodsForUpdate(ctx, ClusterUpdateReasonVersion); err != nil { + return fmt.Errorf("unable to mark pods for update: %w", err) + } + } + // At this point, we have seen a diff and want to update the StatefulSet. npStatus := r.getNodePoolStatus() @@ -612,17 +620,19 @@ func (r *StatefulSetResource) updateStatefulSet( return nil } -// shouldUpdate returns true if changes on the CR require update +// shouldUpdate compares the generated appsv1.StatefulSet from the StatefulSetResource.obj +// function (referred to as `modified`) with appsv1.StatefulSet retrieved from +// the Kubernetes API (referred to as `current`). If a difference is detected, the function returns +// `true` as the first value. Certain fields are ignored because they are not relevant to a Pod +// rolling upgrade (e.g. `Status`, `VolumeClaimTemplate`, or `Annotations`). +// +// If no differences are found, the function returns `false` as first value. The second value +// depends on the Node Pool Status, indicating whether rolling restart is still in progress. func (r *StatefulSetResource) shouldUpdate( current, modified *appsv1.StatefulSet, -) (bool, error) { +) (bool, bool, error) { log := r.logger.WithName("shouldUpdate") - npStatus := r.getNodePoolStatus() - - if npStatus.Restarting || r.pandaCluster.Status.Restarting { - return true, nil - } prepareResourceForPatch(current, modified) opts := []patch.CalculateOption{ patch.IgnoreStatusFields(), @@ -632,10 +642,16 @@ func (r *StatefulSetResource) shouldUpdate( } patchResult, err := patch.NewPatchMaker(patch.NewAnnotator(redpandaAnnotatorKey), &patch.K8sStrategicMergePatcher{}, &patch.BaseJSONMergePatcher{}).Calculate(current, modified, opts...) if err != nil || patchResult.IsEmpty() { - return false, err + npStatus := r.getNodePoolStatus() + + if npStatus.Restarting || r.pandaCluster.Status.Restarting { + return false, true, nil + } + + return false, false, err } log.Info("Detected diff", "patchResult", string(patchResult.Patch)) - return true, nil + return true, false, nil } func (r *StatefulSetResource) getRestartingStatus() bool { diff --git a/operator/pkg/resources/statefulset_update_test.go b/operator/pkg/resources/statefulset_update_test.go index db2b7173c..325b60711 100644 --- a/operator/pkg/resources/statefulset_update_test.go +++ b/operator/pkg/resources/statefulset_update_test.go @@ -163,14 +163,36 @@ func TestShouldUpdate_AnnotationChange(t *testing.T) { }, }, } - update, err := ssres.shouldUpdate(sts, stsWithAnnotation) + stsChange, _, err := ssres.shouldUpdate(sts, stsWithAnnotation) require.NoError(t, err) - require.True(t, update) + require.True(t, stsChange) // same statefulset with same annotation - update, err = ssres.shouldUpdate(stsWithAnnotation, stsWithAnnotation) + stsChange, _, err = ssres.shouldUpdate(stsWithAnnotation, stsWithAnnotation) require.NoError(t, err) - require.False(t, update) + require.False(t, stsChange) + + ssres = StatefulSetResource{ + nodePool: vectorizedv1alpha1.NodePoolSpecWithDeleted{ + NodePoolSpec: vectorizedv1alpha1.NodePoolSpec{ + Name: "default", + }, + }, + pandaCluster: &vectorizedv1alpha1.Cluster{ + Status: vectorizedv1alpha1.ClusterStatus{ + Restarting: false, + NodePools: map[string]vectorizedv1alpha1.NodePoolStatus{ + "default": { + Restarting: true, + }, + }, + }, + }, + } + // same statefulset with same annotation, but with node pool status restarting + _, nodePoolRestarting, err := ssres.shouldUpdate(stsWithAnnotation, stsWithAnnotation) + require.NoError(t, err) + require.True(t, nodePoolRestarting) } func TestPutInMaintenanceMode(t *testing.T) { diff --git a/operator/tests/e2e/additional-configuration/00-redpanda-cluster.yaml b/operator/tests/e2e/additional-configuration/00-redpanda-cluster.yaml index e688b98ec..05ef1cf93 100644 --- a/operator/tests/e2e/additional-configuration/00-redpanda-cluster.yaml +++ b/operator/tests/e2e/additional-configuration/00-redpanda-cluster.yaml @@ -4,7 +4,7 @@ metadata: name: additional-configuration spec: image: "redpandadata/redpanda" - version: "v24.2.5" + version: "v25.1.11" replicas: 1 resources: requests: diff --git a/operator/tests/e2e/additional-configuration/01-verify-config.yaml b/operator/tests/e2e/additional-configuration/01-verify-config.yaml index bec96bcc1..d2baa9261 100644 --- a/operator/tests/e2e/additional-configuration/01-verify-config.yaml +++ b/operator/tests/e2e/additional-configuration/01-verify-config.yaml @@ -1,5 +1,5 @@ apiVersion: kuttl.dev/v1beta1 kind: TestStep commands: - - command: ./verify-config.sh -r 10 -b v24.2 + - command: ./verify-config.sh -r 10 -b v25.1 namespaced: true diff --git a/operator/tests/e2e/additional-configuration/03-verify-config-changed.yaml b/operator/tests/e2e/additional-configuration/03-verify-config-changed.yaml index 891169df8..da9365e3f 100644 --- a/operator/tests/e2e/additional-configuration/03-verify-config-changed.yaml +++ b/operator/tests/e2e/additional-configuration/03-verify-config-changed.yaml @@ -1,5 +1,5 @@ apiVersion: kuttl.dev/v1beta1 kind: TestStep commands: - - command: ./verify-config.sh -r 11 -b v24.2 + - command: ./verify-config.sh -r 11 -b v25.1 namespaced: true diff --git a/operator/tests/e2e/additional-configuration/verify-config-dev.sh b/operator/tests/e2e/additional-configuration/verify-config-dev.sh index e9ade221b..0f6c5be97 100755 --- a/operator/tests/e2e/additional-configuration/verify-config-dev.sh +++ b/operator/tests/e2e/additional-configuration/verify-config-dev.sh @@ -79,6 +79,8 @@ cloud_storage_segment_max_upload_interval_sec: 1800 default_topic_partitions: 3 enable_idempotence: true enable_rack_awareness: true +internal_topic_replication_factor: 3 +kafka_nodelete_topics: [_internal_connectors_configs _internal_connectors_offsets _internal_connectors_status _audit __consumer_offsets _redpanda_e2e_probe _schemas] log_segment_size: 536870912 EOF ) \ No newline at end of file diff --git a/operator/tests/e2e/additional-configuration/verify-config-v25.1.sh b/operator/tests/e2e/additional-configuration/verify-config-v25.1.sh new file mode 100755 index 000000000..e9ade221b --- /dev/null +++ b/operator/tests/e2e/additional-configuration/verify-config-v25.1.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash + +expected=$( + cat <