Skip to content

Commit de2e474

Browse files
committed
operator v1: store NodePoolSpec in STS annotations
we encountered corner cases, where it becomes extremely difficult to synthesize a NodePoolSpec just by looking at the StatefulSet - which is our fallback, if a nodePool was removed from the spec. AdditionalCommandlineArguments is hard to reconstruct, because we'd need to pull out of of the args field in the pod spec of the STS, removing all "other default" args - very error prone. Instead, we now store the NodePoolSpec used to create the STS in the STS as an annotation. This way we can always find the NodePoolSpec to create the (deleted) STS. In addition, we take this chance to remove small special cases for handling delete nodepools: - Do not set replicas=currentReplicas anymore. It was more of a trick. Instead, we now set for a deleted nodePool replicas=0, which exactly represents what should happen with it (scale down to zero). - Add check for Deleted bool in scale-down handler. It prevented replicas=currentReplicas being accepted as "do notthing" if it's a deleted nodepool. Then, the control flow would proceed and downscaling happens. This was not very explicit and very hard to find out, why downscale even works in deleted NodePools. With the refactor, replicas is 0, and no special case is needed for deleting anymore.
1 parent 881aba8 commit de2e474

File tree

4 files changed

+25
-64
lines changed

4 files changed

+25
-64
lines changed

operator/pkg/labels/labels.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ const (
3636
// PodNodeIDKey is used to store the Redpanda NodeID of this pod.
3737
PodNodeIDKey = "operator.redpanda.com/node-id"
3838

39+
// NodePoolSpecKey is used to store the NodePoolSpec in a StatefulSet's annotations.
40+
// This allows the operator to correctly reconstruct a NodePoolSpec even
41+
// after it was removed from Spec already.
42+
NodePoolSpecKey = "cluster.redpanda.com/node-pool-spec"
43+
3944
nameKeyRedpandaVal = "redpanda"
4045
nameKeyConsoleVal = "redpanda-console"
4146
managedByOperatorVal = "redpanda-operator"

operator/pkg/nodepools/pools.go

Lines changed: 11 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,14 @@ package nodepools
1818

1919
import (
2020
"context"
21+
"encoding/json"
2122
"fmt"
2223
"slices"
2324
"strings"
2425

2526
vectorizedv1alpha1 "github.com/redpanda-data/redpanda-operator/operator/api/vectorized/v1alpha1"
2627
"github.com/redpanda-data/redpanda-operator/operator/pkg/labels"
2728
appsv1 "k8s.io/api/apps/v1"
28-
corev1 "k8s.io/api/core/v1"
29-
"k8s.io/apimachinery/pkg/api/resource"
3029
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3130
"k8s.io/utils/ptr"
3231
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -89,70 +88,20 @@ outer:
8988
continue
9089
}
9190

92-
replicas := sts.Spec.Replicas
93-
if st, ok := cluster.Status.NodePools[npName]; ok {
94-
replicas = &st.CurrentReplicas
95-
}
96-
97-
var redpandaContainer *corev1.Container
98-
for i := range sts.Spec.Template.Spec.Containers {
99-
container := sts.Spec.Template.Spec.Containers[i]
100-
if container.Name == "redpanda" {
101-
redpandaContainer = &container
102-
break
91+
var np vectorizedv1alpha1.NodePoolSpec
92+
if nodePoolSpecJSON, ok := sts.Annotations[labels.NodePoolSpecKey]; ok {
93+
if err := json.Unmarshal([]byte(nodePoolSpecJSON), &np); err != nil {
94+
return nil, fmt.Errorf("failed to synthesize deleted nodePool %s from its annotation %s", npName, labels.NodePoolSpecKey)
10395
}
10496
}
105-
if redpandaContainer == nil {
106-
return nil, fmt.Errorf("redpanda container not defined in STS %s template", sts.Name)
107-
}
108-
109-
var datadirVcCapacity resource.Quantity
110-
var datadirVcStorageClassName string
11197

112-
var cacheVcExists bool
113-
var cacheVcCapacity resource.Quantity
114-
var cacheVcStorageClassName string
115-
116-
for i := range sts.Spec.VolumeClaimTemplates {
117-
vct := sts.Spec.VolumeClaimTemplates[i]
118-
if vct.Name == "datadir" {
119-
datadirVcCapacity = vct.Spec.Resources.Requests[corev1.ResourceStorage]
120-
if vct.Spec.StorageClassName != nil {
121-
datadirVcStorageClassName = ptr.Deref(vct.Spec.StorageClassName, "")
122-
}
123-
}
124-
if vct.Name == "shadow-index-cache" {
125-
cacheVcExists = true
126-
cacheVcCapacity = vct.Spec.Resources.Requests[corev1.ResourceStorage]
127-
if vct.Spec.StorageClassName != nil {
128-
cacheVcStorageClassName = ptr.Deref(vct.Spec.StorageClassName, "")
129-
}
130-
}
131-
}
98+
// Desired replicas for deleted NodePools is always zero.
99+
np.Replicas = ptr.To(int32(0))
132100

133-
np := vectorizedv1alpha1.NodePoolSpecWithDeleted{
134-
NodePoolSpec: vectorizedv1alpha1.NodePoolSpec{
135-
Name: npName,
136-
Replicas: replicas,
137-
Resources: vectorizedv1alpha1.RedpandaResourceRequirements{
138-
ResourceRequirements: redpandaContainer.Resources,
139-
},
140-
Tolerations: sts.Spec.Template.Spec.Tolerations,
141-
NodeSelector: sts.Spec.Template.Spec.NodeSelector,
142-
Storage: vectorizedv1alpha1.StorageSpec{
143-
Capacity: datadirVcCapacity,
144-
StorageClassName: datadirVcStorageClassName,
145-
},
146-
},
147-
Deleted: true,
148-
}
149-
if cacheVcExists {
150-
np.CloudCacheStorage = vectorizedv1alpha1.StorageSpec{
151-
Capacity: cacheVcCapacity,
152-
StorageClassName: cacheVcStorageClassName,
153-
}
154-
}
155-
nodePoolsWithDeleted = append(nodePoolsWithDeleted, &np)
101+
nodePoolsWithDeleted = append(nodePoolsWithDeleted, &vectorizedv1alpha1.NodePoolSpecWithDeleted{
102+
NodePoolSpec: np,
103+
Deleted: true,
104+
})
156105
}
157106
return nodePoolsWithDeleted, nil
158107
}

operator/pkg/resources/statefulset.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,11 +428,19 @@ func (r *StatefulSetResource) obj(
428428
nodePoolSelector = clusterLabels.AsAPISelector()
429429
}
430430

431+
nodePoolSpecJSON, err := json.Marshal(r.nodePool.NodePoolSpec)
432+
if err != nil {
433+
return nil, fmt.Errorf("failed to marshal NodePoolSpec as JSON: %w", err)
434+
}
435+
431436
ss := &appsv1.StatefulSet{
432437
ObjectMeta: metav1.ObjectMeta{
433438
Namespace: r.Key().Namespace,
434439
Name: r.Key().Name,
435440
Labels: nodePoolLabels,
441+
Annotations: map[string]string{
442+
labels.NodePoolSpecKey: string(nodePoolSpecJSON),
443+
},
436444
},
437445
TypeMeta: metav1.TypeMeta{
438446
Kind: "StatefulSet",

operator/pkg/resources/statefulset_scale.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,8 @@ func (r *StatefulSetResource) handleScaling(ctx context.Context) error {
102102
return r.setCurrentReplicas(ctx, *r.nodePool.Replicas, r.nodePool.Name, r.logger)
103103
}
104104

105-
if ptr.Deref(r.nodePool.Replicas, 0) == npCurrentReplicas && !r.nodePool.Deleted {
105+
if ptr.Deref(r.nodePool.Replicas, 0) == npCurrentReplicas {
106106
log.V(logger.DebugLevel).Info("No scaling changes required for this nodepool", "replicas", *r.nodePool.Replicas, "spec replicas", *r.LastObservedState.Spec.Replicas) // No changes to replicas, we do nothing here
107-
108107
return nil
109108
}
110109

0 commit comments

Comments
 (0)