Skip to content

Commit 0de9712

Browse files
Merge pull request #2033 from jsafrane/progressing-set-version-annotation
OCPBUGS-62634: Fix CSI node DaemonSet progressing
2 parents 69ca907 + 1efdba0 commit 0de9712

File tree

2 files changed

+137
-10
lines changed

2 files changed

+137
-10
lines changed

pkg/operator/csi/csidrivernodeservicecontroller/csi_driver_node_service_controller.go

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ const (
3434
nodeDriverRegistrarImageEnvName = "NODE_DRIVER_REGISTRAR_IMAGE"
3535
livenessProbeImageEnvName = "LIVENESS_PROBE_IMAGE"
3636
kubeRBACProxyImageEnvName = "KUBE_RBAC_PROXY_IMAGE"
37+
stableGenerationAnnotationName = "storage.openshift.io/stable-generation"
3738
)
3839

3940
// DaemonSetHookFunc is a hook function to modify the DaemonSet.
@@ -181,6 +182,11 @@ func (c *CSIDriverNodeServiceController) syncManaged(ctx context.Context, opSpec
181182
return err
182183
}
183184

185+
daemonSet, err = c.storeLastStableGeneration(ctx, syncContext, daemonSet)
186+
if err != nil {
187+
return err
188+
}
189+
184190
// Create an OperatorStatusApplyConfiguration with generations
185191
status := applyoperatorv1.OperatorStatus().
186192
WithGenerations(&applyoperatorv1.GenerationStatusApplyConfiguration{
@@ -217,7 +223,7 @@ func (c *CSIDriverNodeServiceController) syncManaged(ctx context.Context, opSpec
217223
WithMessage("DaemonSet is not progressing").
218224
WithReason("AsExpected")
219225

220-
if ok, msg := isProgressing(opStatus, daemonSet); ok {
226+
if ok, msg := isProgressing(daemonSet); ok {
221227
progressingCondition = progressingCondition.
222228
WithStatus(opv1.ConditionTrue).
223229
WithMessage(msg).
@@ -253,7 +259,22 @@ func (c *CSIDriverNodeServiceController) getDaemonSet(opSpec *opv1.OperatorSpec)
253259
return required, nil
254260
}
255261

256-
func isProgressing(status *opv1.OperatorStatus, daemonSet *appsv1.DaemonSet) (bool, string) {
262+
func isProgressing(daemonSet *appsv1.DaemonSet) (bool, string) {
263+
// Progressing means "[the component] is actively rolling out new code, propagating config
264+
// changes (e.g, a version change), or otherwise moving from one steady state to another."
265+
// This controller expects that all "config changes" result in increased DaemonSet generation
266+
// (i.e. DaemonSet .spec changes)
267+
// The controller stores the last "stable" DS generation in an annotation.
268+
// Stable means that all DS replicas are available and at the latest version.
269+
// Any subsequent missing replicas must be caused by a node added / removed / rebooted /
270+
// pod manually killed, which then does not result in Progressing=true.
271+
lastStableGeneration := daemonSet.Annotations[stableGenerationAnnotationName]
272+
currentGeneration := strconv.FormatInt(daemonSet.Generation, 10)
273+
if lastStableGeneration == currentGeneration {
274+
// The previous reconfiguration has completed in the past.
275+
return false, ""
276+
}
277+
257278
switch {
258279
case daemonSet.Generation != daemonSet.Status.ObservedGeneration:
259280
return true, "Waiting for DaemonSet to act on changes"
@@ -315,3 +336,20 @@ func (c *CSIDriverNodeServiceController) syncDeleting(ctx context.Context, opSpe
315336
// All removed, remove the finalizer as the last step
316337
return v1helpers.RemoveFinalizer(ctx, c.operatorClient, c.instanceName)
317338
}
339+
340+
func (c *CSIDriverNodeServiceController) storeLastStableGeneration(ctx context.Context, syncContext factory.SyncContext, daemonSet *appsv1.DaemonSet) (*appsv1.DaemonSet, error) {
341+
lastStableGeneration := daemonSet.Annotations[stableGenerationAnnotationName]
342+
currentGeneration := strconv.FormatInt(daemonSet.Generation, 10)
343+
if lastStableGeneration == currentGeneration {
344+
return daemonSet, nil
345+
}
346+
347+
if isProgressing, _ := isProgressing(daemonSet); isProgressing {
348+
return daemonSet, nil
349+
}
350+
351+
klog.V(2).Infof("DaemonSet %s/%s generation %d is stable", daemonSet.Namespace, daemonSet.Name, daemonSet.Generation)
352+
daemonSet.Annotations[stableGenerationAnnotationName] = currentGeneration
353+
daemonSet, _, err := resourceapply.ApplyDaemonSet(ctx, c.kubeClient.AppsV1(), syncContext.Recorder(), daemonSet, daemonSet.Generation)
354+
return daemonSet, err
355+
}

pkg/operator/csi/csidrivernodeservicecontroller/csi_driver_node_service_controller_test.go

Lines changed: 97 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package csidrivernodeservicecontroller
33
import (
44
"context"
55
"fmt"
6-
clocktesting "k8s.io/utils/clock/testing"
76
"os"
87
"sort"
98
"strings"
@@ -19,6 +18,8 @@ import (
1918
coreinformers "k8s.io/client-go/informers"
2019
fakecore "k8s.io/client-go/kubernetes/fake"
2120
core "k8s.io/client-go/testing"
21+
"k8s.io/client-go/tools/cache"
22+
clocktesting "k8s.io/utils/clock/testing"
2223

2324
"github.com/google/go-cmp/cmp"
2425
opv1 "github.com/openshift/api/operator/v1"
@@ -100,7 +101,7 @@ func newTestContext(test testCase, t *testing.T) *testContext {
100101
}
101102

102103
// Add global reactors
103-
addGenerationReactor(coreClient)
104+
addGenerationReactor(coreClient, coreInformerFactory.Apps().V1().DaemonSets().Informer().GetStore())
104105

105106
// fakeDriverInstance also fulfils the OperatorClient interface
106107
fakeOperatorClient := v1helpers.NewFakeOperatorClientWithObjectMeta(
@@ -331,8 +332,18 @@ func withDaemonSetTLSConfig() daemonSetModifier {
331332
}
332333
}
333334

335+
func withDaemonSetAnnotation(annotation string, value string) daemonSetModifier {
336+
return func(instance *appsv1.DaemonSet) *appsv1.DaemonSet {
337+
if instance.Annotations == nil {
338+
instance.Annotations = map[string]string{}
339+
}
340+
instance.Annotations[annotation] = value
341+
return instance
342+
}
343+
}
344+
334345
// This reactor is always enabled and bumps DaemonSet generation when they get updated.
335-
func addGenerationReactor(client *fakecore.Clientset) {
346+
func addGenerationReactor(client *fakecore.Clientset, store cache.Store) {
336347
client.PrependReactor("*", "daemonsets", func(action core.Action) (handled bool, ret runtime.Object, err error) {
337348
switch a := action.(type) {
338349
case core.CreateActionImpl:
@@ -343,7 +354,20 @@ func addGenerationReactor(client *fakecore.Clientset) {
343354
case core.UpdateActionImpl:
344355
object := a.GetObject()
345356
ds := object.(*appsv1.DaemonSet)
346-
ds.Generation++
357+
358+
// Bump generation only when the spec changes.
359+
// Explicitly, do not bump it on annotation change - the API server does not do it either.
360+
oldDSObject, exists, err := store.GetByKey(ds.Namespace + "/" + ds.Name)
361+
if err != nil {
362+
return false, nil, err
363+
}
364+
if !exists {
365+
return false, nil, fmt.Errorf("DaemonSet %s not found", cache.MetaObjectToName(ds))
366+
}
367+
oldDS := oldDSObject.(*appsv1.DaemonSet)
368+
if !equality.Semantic.DeepEqual(oldDS.Spec, ds.Spec) {
369+
ds.Generation++
370+
}
347371
return false, ds, nil
348372
}
349373
return false, nil, nil
@@ -426,6 +450,7 @@ func TestSync(t *testing.T) {
426450
},
427451
},
428452
{
453+
name: "no change",
429454
// DaemonSet is fully deployed and its status is synced to CR
430455
manifestFunc: makeFakeManifest,
431456
images: defaultImages(),
@@ -434,15 +459,45 @@ func TestSync(t *testing.T) {
434459
argsLevel2,
435460
defaultImages(),
436461
withDaemonSetGeneration(1, 1),
437-
withDaemonSetStatus(replica1, replica1, replica1, replica0)),
462+
withDaemonSetStatus(replica1, replica1, replica1, replica0),
463+
withDaemonSetAnnotation(stableGenerationAnnotationName, "1")),
438464
driver: makeFakeDriverInstance(withGenerations(1)),
439465
},
440466
expectedObjects: testObjects{
441467
daemonSet: getDaemonSet(
442468
argsLevel2,
443469
defaultImages(),
444470
withDaemonSetGeneration(1, 1),
445-
withDaemonSetStatus(replica1, replica1, replica1, replica0)),
471+
withDaemonSetStatus(replica1, replica1, replica1, replica0),
472+
withDaemonSetAnnotation(stableGenerationAnnotationName, "1")),
473+
driver: makeFakeDriverInstance(
474+
// withStatus(replica1),
475+
withGenerations(1),
476+
withTrueConditions(conditionAvailable),
477+
withFalseConditions(conditionProgressing)),
478+
},
479+
},
480+
{
481+
name: "finished progressing",
482+
// DaemonSet is fully deployed and its status is synced to CR
483+
manifestFunc: makeFakeManifest,
484+
images: defaultImages(),
485+
initialObjects: testObjects{
486+
daemonSet: getDaemonSet(
487+
argsLevel2,
488+
defaultImages(),
489+
withDaemonSetGeneration(1, 1),
490+
withDaemonSetStatus(replica1, replica1, replica1, replica0),
491+
withDaemonSetAnnotation(stableGenerationAnnotationName, "0")),
492+
driver: makeFakeDriverInstance(withGenerations(1)),
493+
},
494+
expectedObjects: testObjects{
495+
daemonSet: getDaemonSet(
496+
argsLevel2,
497+
defaultImages(),
498+
withDaemonSetGeneration(1, 1),
499+
withDaemonSetStatus(replica1, replica1, replica1, replica0),
500+
withDaemonSetAnnotation(stableGenerationAnnotationName, "1")), // The current generation has reached stability
446501
driver: makeFakeDriverInstance(
447502
// withStatus(replica1),
448503
withGenerations(1),
@@ -490,7 +545,8 @@ func TestSync(t *testing.T) {
490545
argsLevel2,
491546
defaultImages(),
492547
withDaemonSetGeneration(1, 1),
493-
withDaemonSetStatus(replica0, replica0, replica1, replica1)), // the DaemonSet is updating 1 pod
548+
withDaemonSetStatus(replica0, replica0, replica1, replica1), // the DaemonSet is updating 1 pod
549+
withDaemonSetAnnotation(stableGenerationAnnotationName, "0")), // and the current generation was not stable yet
494550
driver: makeFakeDriverInstance(
495551
// withStatus(replica1),
496552
withGenerations(1),
@@ -502,7 +558,8 @@ func TestSync(t *testing.T) {
502558
argsLevel2,
503559
defaultImages(),
504560
withDaemonSetGeneration(1, 1),
505-
withDaemonSetStatus(replica0, replica0, replica1, replica1)), // no change to the DaemonSet
561+
withDaemonSetStatus(replica0, replica0, replica1, replica1),
562+
withDaemonSetAnnotation(stableGenerationAnnotationName, "0")), // and the current generation was not stable yet
506563
driver: makeFakeDriverInstance(
507564
// withStatus(replica0),
508565
withGenerations(1),
@@ -658,6 +715,38 @@ func TestSync(t *testing.T) {
658715
withFalseConditions(conditionAvailable)), // Degraded is set later on
659716
},
660717
},
718+
{
719+
// DaemonSet is updating pods
720+
name: "missing pods - not progressing",
721+
manifestFunc: makeFakeManifest,
722+
images: defaultImages(),
723+
initialObjects: testObjects{
724+
daemonSet: getDaemonSet(
725+
argsLevel2,
726+
defaultImages(),
727+
withDaemonSetGeneration(1, 1),
728+
withDaemonSetStatus(replica0, replica0, replica1, replica1), // the DaemonSet is updating 1 pod
729+
withDaemonSetAnnotation(stableGenerationAnnotationName, "1")), // But the current generation was stable in the past
730+
driver: makeFakeDriverInstance(
731+
// withStatus(replica1),
732+
withGenerations(1),
733+
withTrueConditions(conditionAvailable),
734+
withFalseConditions(conditionProgressing)),
735+
},
736+
expectedObjects: testObjects{
737+
daemonSet: getDaemonSet(
738+
argsLevel2,
739+
defaultImages(),
740+
withDaemonSetGeneration(1, 1),
741+
withDaemonSetStatus(replica0, replica0, replica1, replica1),
742+
withDaemonSetAnnotation(stableGenerationAnnotationName, "1")), // no change to the DaemonSet
743+
driver: makeFakeDriverInstance(
744+
// withStatus(replica0),
745+
withGenerations(1), // But the current generation was stable in the past
746+
withTrueConditions(conditionAvailable),
747+
withFalseConditions(conditionProgressing)), // The operator is not Progressing
748+
},
749+
},
661750
}
662751

663752
for _, test := range testCases {

0 commit comments

Comments
 (0)