Skip to content

Commit ca057f0

Browse files
committed
fixup! fix: resolve deadlock when maxSurge>0 rolling update on single-replica LWS
Revert the wantReplicas condition change (<= back to <): the original <= was correct for multi-replica cases (e.g. replicas=4, maxSurge=1: when only 1 replica is still unready we want to start reclaiming the surge slot). The condition change broke existing tests. The bug is fully fixed by the Case 2 change alone (returning burstReplicas directly instead of wantReplicas(lwsReplicas)). Also correct the new integration test expectations to match the actual controller behaviour: after the surge pod (index=1) becomes ready the controller immediately shrinks replicas back to 1 and sets partition=0 so that group-0 starts being replaced.
1 parent 6bd4412 commit ca057f0

File tree

2 files changed

+24
-19
lines changed

2 files changed

+24
-19
lines changed

pkg/controllers/leaderworkerset_controller.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,10 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context,
293293
// unready (not-yet-updated) replicas. It is only called after the initial
294294
// surge expansion has already happened (i.e. stsReplicas == burstReplicas).
295295
wantReplicas := func(unreadyReplicas int32) int32 {
296-
if unreadyReplicas < int32(maxSurge) {
297-
// We have fewer unready replicas than the surge budget, meaning some
298-
// surge replicas have already been replaced. Release one surge slot
299-
// per newly-ready replica so we converge on lwsReplicas.
296+
if unreadyReplicas <= int32(maxSurge) {
297+
// We have at most maxSurge unready replicas, meaning all or most of the
298+
// surge replicas have been replaced. Release one surge slot per
299+
// newly-ready replica so we converge on lwsReplicas.
300300
finalReplicas := lwsReplicas + utils.NonZeroValue(int32(unreadyReplicas)-1)
301301
r.Record.Eventf(lws, nil, corev1.EventTypeNormal, GroupsProgressing, Delete, fmt.Sprintf("deleting surge replica %s-%d", lws.Name, finalReplicas))
302302
return finalReplicas
@@ -310,8 +310,8 @@ func (r *LeaderWorkerSetReconciler) rollingUpdateParameters(ctx context.Context,
310310
// so we must first expand to burstReplicas before the rolling partition
311311
// can start advancing. Calling wantReplicas(lwsReplicas) here was wrong:
312312
// with replicas=1 and maxSurge=1 it satisfied the shrink condition
313-
// (1 <= 1) and immediately returned lwsReplicas, preventing the surge
314-
// replica from ever being created.
313+
// (unreadyReplicas(1) <= maxSurge(1)) and immediately returned lwsReplicas,
314+
// preventing the surge replica from ever being created.
315315
if leaderWorkerSetUpdated {
316316
// Processing scaling up/down first prior to rolling update.
317317
return min(lwsReplicas, stsReplicas), burstReplicas, nil

test/integration/controllers/leaderworkerset_test.go

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2299,9 +2299,9 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() {
22992299
}),
23002300

23012301
ginkgo.Entry("rolling update with maxSurge=1 and single replica creates surge before rolling", &testCase{
2302-
// Regression test for: with replicas=1 and maxSurge=1 the controller
2303-
// was entering the shrink path of wantReplicas immediately on update
2304-
// (condition was unreadyReplicas<=maxSurge i.e. 1<=1), so it returned
2302+
// Regression test for: with replicas=1 and maxSurge=1, calling
2303+
// wantReplicas(lwsReplicas) in Case 2 satisfied the shrink condition
2304+
// (unreadyReplicas(1) <= maxSurge(1)), so the controller returned
23052305
// replicas=1 and emitted a spurious "deleting surge replica" event
23062306
// without ever creating the surge pod, leaving the update stuck forever.
23072307
makeLeaderWorkerSet: func(nsName string) *wrappers.LeaderWorkerSetWrapper {
@@ -2323,39 +2323,44 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() {
23232323
},
23242324
{
23252325
// Trigger rolling update by changing worker template.
2326-
// The controller must expand to burstReplicas (2) before
2327-
// advancing the partition -- it must NOT stay at replicas=1.
2326+
// The controller must expand to burstReplicas (2) with partition=1
2327+
// so the surge pod gets the new template. Before the fix, Case 2
2328+
// returned replicas=1 and the STS never grew.
23282329
lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) {
23292330
testing.UpdateWorkerTemplate(ctx, k8sClient, lws)
23302331
},
23312332
checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) {
2332-
// replicas must be 2 (1 original + 1 surge) to prove
2333-
// that the surge was actually created.
2333+
// replicas must be 2 (1 original + 1 surge) to prove the surge
2334+
// was actually created. partition=1 means pod-0 (old) is held,
2335+
// pod-1 (surge, new template) is created first.
23342336
testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 2)
23352337
testing.ExpectLeaderWorkerSetProgressing(ctx, k8sClient, lws, "Replicas are progressing")
23362338
testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress")
2337-
testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0)
2339+
testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 1)
23382340
testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 1, 0)
23392341
},
23402342
},
23412343
{
2342-
// Create the surge leader pod and mark it ready, which allows
2343-
// the rollout to proceed.
2344+
// Create the surge leader pod and mark it ready. Once pod-1 is
2345+
// ready the controller releases the surge (replicas=1, partition=0)
2346+
// and pod-0 starts being replaced with the new template.
23442347
lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) {
23452348
var leaderSts appsv1.StatefulSet
23462349
testing.GetLeaderStatefulset(ctx, lws, k8sClient, &leaderSts)
23472350
gomega.Expect(testing.CreateLeaderPods(ctx, leaderSts, k8sClient, lws, 1, 2)).To(gomega.Succeed())
23482351
testing.SetPodGroupToReady(ctx, k8sClient, lws.Name+"-1", lws)
23492352
},
23502353
checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) {
2351-
// Surge replica-1 is ready; replicas should shrink back toward lwsReplicas.
2354+
// Surge released: STS back to replicas=1, partition=0.
2355+
// pod-0 is being replaced (Progressing, not yet Available).
2356+
testing.ExpectValidLeaderStatefulSet(ctx, k8sClient, lws, 1)
2357+
testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 0)
23522358
testing.ExpectLeaderWorkerSetProgressing(ctx, k8sClient, lws, "Replicas are progressing")
23532359
testing.ExpectLeaderWorkerSetUpgradeInProgress(ctx, k8sClient, lws, "Rolling Upgrade is in progress")
23542360
},
23552361
},
23562362
{
2357-
// Mark group-0 (the original replica) as ready on the new template;
2358-
// the rollout should now complete and replicas return to 1.
2363+
// Mark group-0 ready on the new template; the rollout completes.
23592364
lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) {
23602365
testing.SetPodGroupToReady(ctx, k8sClient, lws.Name+"-0", lws)
23612366
},

0 commit comments

Comments
 (0)