Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
353 changes: 197 additions & 156 deletions internal/controllers/machinedeployment/machinedeployment_rolling.go

Large diffs are not rendered by default.

516 changes: 390 additions & 126 deletions internal/controllers/machinedeployment/machinedeployment_rolling_test.go

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"time"

"github.com/google/go-cmp/cmp"
"github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
Expand All @@ -39,7 +40,7 @@ import (
"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
)

type rolloutSequenceTestCase struct {
type rolloutRollingSequenceTestCase struct {
name string
maxSurge int32
maxUnavailable int32
Expand Down Expand Up @@ -107,11 +108,12 @@ type rolloutSequenceTestCase struct {
seed int64
}

func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
func Test_rolloutRollingSequences(t *testing.T) {
ctx := context.Background()
ctx = ctrl.LoggerInto(ctx, klog.Background())
klog.SetOutput(ginkgo.GinkgoWriter)

tests := []rolloutSequenceTestCase{
tests := []rolloutRollingSequenceTestCase{
// Regular rollout (no in-place)

{ // scale out by 1
Expand Down Expand Up @@ -161,7 +163,7 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
maxSurge: 3,
maxUnavailable: 1,
currentScope: &rolloutScope{ // Manually providing a scope simulating a MD originally with 6 replica in the middle of a rollout, with 3 machines already created in the newMS and 3 still on the oldMS, and then MD scaled up to 12.
machineDeployment: createMD("v2", 12, 3, 1),
machineDeployment: createMD("v2", 12, withRolloutStrategy(3, 1)),
machineSets: []*clusterv1.MachineSet{
createMS("ms1", "v1", 3),
createMS("ms2", "v2", 3),
Expand Down Expand Up @@ -189,7 +191,7 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
maxSurge: 3,
maxUnavailable: 1,
currentScope: &rolloutScope{ // Manually providing a scope simulating a MD originally with 12 replica in the middle of a rollout, with 3 machines already created in the newMS and 9 still on the oldMS, and then MD scaled down to 6.
machineDeployment: createMD("v2", 6, 3, 1),
machineDeployment: createMD("v2", 6, withRolloutStrategy(3, 1)),
machineSets: []*clusterv1.MachineSet{
createMS("ms1", "v1", 9),
createMS("ms2", "v2", 3),
Expand Down Expand Up @@ -223,7 +225,7 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
maxSurge: 3,
maxUnavailable: 1,
currentScope: &rolloutScope{ // Manually providing a scope simulating a MD with 6 replica in the middle of a rollout, with 3 machines already created in the newMS and 3 still on the oldMS, and then MD spec is changed.
machineDeployment: createMD("v3", 6, 3, 1),
machineDeployment: createMD("v3", 6, withRolloutStrategy(3, 1)),
machineSets: []*clusterv1.MachineSet{
createMS("ms1", "v1", 3),
createMS("ms2", "v2", 3),
Expand All @@ -249,9 +251,8 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
}

testWithPredictableReconcileOrder := true
// TODO(in-place): enable tests with random reconcile order as soon as the issues in reconcileOldMachineSets are fixed
testWithRandomReconcileOrderFromConstantSeed := false
testWithRandomReconcileOrderFromRandomSeed := false
testWithRandomReconcileOrderFromConstantSeed := true
testWithRandomReconcileOrderFromRandomSeed := true

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -260,11 +261,9 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
if testWithPredictableReconcileOrder {
tt.maxIterations = 50
tt.randomControllerOrder = false
if tt.logAndGoldenFileName == "" {
tt.logAndGoldenFileName = strings.ToLower(tt.name)
}
tt.logAndGoldenFileName = strings.ToLower(tt.name)
t.Run("default", func(t *testing.T) {
runTestCase(ctx, t, tt)
runRolloutRollingTestCase(ctx, t, tt)
})
}

Expand All @@ -273,13 +272,9 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
tt.name = fmt.Sprintf("%s, random(0)", name)
tt.randomControllerOrder = true
tt.seed = 0
// TODO(in-place): drop the following line as soon as issue with scale down are fixed
tt.skipLogToFileAndGoldenFileCheck = true
if tt.logAndGoldenFileName == "" {
tt.logAndGoldenFileName = strings.ToLower(tt.name)
}
tt.logAndGoldenFileName = strings.ToLower(tt.name)
t.Run("random(0)", func(t *testing.T) {
runTestCase(ctx, t, tt)
runRolloutRollingTestCase(ctx, t, tt)
})
}

Expand All @@ -291,15 +286,15 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
tt.randomControllerOrder = true
tt.skipLogToFileAndGoldenFileCheck = true
t.Run(fmt.Sprintf("random(%d)", tt.seed), func(t *testing.T) {
runTestCase(ctx, t, tt)
runRolloutRollingTestCase(ctx, t, tt)
})
}
}
})
}
}

func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase) {
func runRolloutRollingTestCase(ctx context.Context, t *testing.T, tt rolloutRollingSequenceTestCase) {
t.Helper()
g := NewWithT(t)

Expand All @@ -310,7 +305,7 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase)
// Init current and desired state from test case
current := tt.currentScope.Clone()
if current == nil {
current = initCurrentRolloutScope(tt)
current = initCurrentRolloutScope(tt.currentMachineNames, withRolloutStrategy(tt.maxSurge, tt.maxUnavailable))
}
desired := computeDesiredRolloutScope(current, tt.desiredMachineNames)

Expand All @@ -324,12 +319,15 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase)
i := 1
maxIterations := tt.maxIterations
for {
taskOrder := defaultTaskOrder(current)
taskList := getTaskListRolloutRolling(current)
taskCount := len(taskList)
taskOrder := defaultTaskOrder(taskCount)
if tt.randomControllerOrder {
taskOrder = randomTaskOrder(current, rng)
taskOrder = randomTaskOrder(taskCount, rng)
}
for _, taskID := range taskOrder {
if taskID == 0 {
task := taskList[taskID]
if task == "md" {
fLogger.Logf("[MD controller] Iteration %d, Reconcile md", i)
fLogger.Logf("[MD controller] - Input to rollout planner\n%s", current)

Expand All @@ -339,8 +337,9 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase)
p.newMS = current.newMS()
p.oldMSs = current.oldMSs()

err := p.Plan(ctx)
err := p.planRolloutRolling(ctx)
g.Expect(err).ToNot(HaveOccurred())

// Apply changes.
for _, ms := range current.machineSets {
if scaleIntent, ok := p.scaleIntents[ms.Name]; ok {
Expand Down Expand Up @@ -383,8 +382,8 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase)

// Run mutators faking other controllers
for _, ms := range current.machineSets {
if fmt.Sprintf("ms%d", taskID) == ms.Name {
fLogger.Logf("[MS controller] Iteration %d, Reconcile ms%d, %s", i, taskID, msLog(ms, current.machineSetMachines[ms.Name]))
if ms.Name == task {
fLogger.Logf("[MS controller] Iteration %d, Reconcile %s, %s", i, ms.Name, msLog(ms, current.machineSetMachines[ms.Name]))
machineSetControllerMutator(fLogger, ms, current)
break
}
Expand Down Expand Up @@ -486,15 +485,14 @@ type rolloutScope struct {
machineUID int32
}

// Init creates current state and desired state for rolling out a md from currentMachines to wantMachineNames.
func initCurrentRolloutScope(tt rolloutSequenceTestCase) (current *rolloutScope) {
func initCurrentRolloutScope(currentMachineNames []string, mdOptions ...machineDeploymentOption) (current *rolloutScope) {
// create current state, with a MD with
// - given MaxSurge, MaxUnavailable
// - replica counters assuming all the machines are at stable state
// - spec different from the MachineSets and Machines we are going to create down below (to simulate a change that triggers a rollout, but it is not yet started)
mdReplicaCount := int32(len(tt.currentMachineNames))
mdReplicaCount := int32(len(currentMachineNames))
current = &rolloutScope{
machineDeployment: createMD("v2", mdReplicaCount, tt.maxSurge, tt.maxUnavailable),
machineDeployment: createMD("v2", mdReplicaCount, mdOptions...),
}

// Create current MS, with
Expand All @@ -507,13 +505,12 @@ func initCurrentRolloutScope(tt rolloutSequenceTestCase) (current *rolloutScope)
// - spec at stable state (rollout is not yet propagated to machines)
var totMachines int32
currentMachines := []*clusterv1.Machine{}
for _, machineSetMachineName := range tt.currentMachineNames {
for _, machineSetMachineName := range currentMachineNames {
totMachines++
currentMachines = append(currentMachines, createM(machineSetMachineName, ms.Name, ms.Spec.Template.Spec.FailureDomain))
}
current.machineSetMachines = map[string][]*clusterv1.Machine{}
current.machineSetMachines[ms.Name] = currentMachines

current.machineUID = totMachines

// TODO(in-place): this should be removed as soon as rolloutPlanner will take care of creating newMS
Expand Down Expand Up @@ -835,20 +832,30 @@ func maxSurgeToleration() func(log *fileLogger, _ int, _ *rolloutScope, _, _ int
}
}

func getTaskListRolloutRolling(current *rolloutScope) []string {
taskList := make([]string, 0)
taskList = append(taskList, "md")
for _, ms := range current.machineSets {
taskList = append(taskList, ms.Name)
}
taskList = append(taskList, fmt.Sprintf("ms%d", len(current.machineSets)+1)) // r the MachineSet that might be created when reconciling md
return taskList
}

// default task order ensure the controllers are run in a consistent and predictable way: md, ms1, ms2 and so on.
func defaultTaskOrder(current *rolloutScope) []int {
func defaultTaskOrder(taskCount int) []int {
taskOrder := []int{}
for t := range len(current.machineSets) + 1 + 1 { // +1 is for the MachineSet that might be created when reconciling md, +1 is for the md itself
for t := range taskCount {
taskOrder = append(taskOrder, t)
}
return taskOrder
}

func randomTaskOrder(current *rolloutScope, rng *rand.Rand) []int {
func randomTaskOrder(taskCount int, rng *rand.Rand) []int {
u := &UniqueRand{
rng: rng,
generated: map[int]bool{},
max: len(current.machineSets) + 1 + 1, // +1 is for the MachineSet that might be created when reconciling md, +1 is for the md itself
max: taskCount,
}
taskOrder := []int{}
for !u.Done() {
Expand All @@ -864,32 +871,47 @@ func randomTaskOrder(current *rolloutScope, rng *rand.Rand) []int {
return taskOrder
}

func createMD(failureDomain string, replicas int32, maxSurge, maxUnavailable int32) *clusterv1.MachineDeployment {
return &clusterv1.MachineDeployment{
type machineDeploymentOption func(md *clusterv1.MachineDeployment)

func withRolloutStrategy(maxSurge, maxUnavailable int32) func(md *clusterv1.MachineDeployment) {
return func(md *clusterv1.MachineDeployment) {
md.Spec.Rollout.Strategy = clusterv1.MachineDeploymentRolloutStrategy{
Type: clusterv1.RollingUpdateMachineDeploymentStrategyType,
RollingUpdate: clusterv1.MachineDeploymentRolloutStrategyRollingUpdate{
MaxSurge: ptr.To(intstr.FromInt32(maxSurge)),
MaxUnavailable: ptr.To(intstr.FromInt32(maxUnavailable)),
},
}
}
}

func createMD(failureDomain string, replicas int32, options ...machineDeploymentOption) *clusterv1.MachineDeployment {
md := &clusterv1.MachineDeployment{
ObjectMeta: metav1.ObjectMeta{Name: "md"},
Spec: clusterv1.MachineDeploymentSpec{
// Note: using failureDomain as a template field to determine upToDate
Template: clusterv1.MachineTemplateSpec{Spec: clusterv1.MachineSpec{FailureDomain: failureDomain}},
Replicas: &replicas,
Rollout: clusterv1.MachineDeploymentRolloutSpec{
Strategy: clusterv1.MachineDeploymentRolloutStrategy{
Type: clusterv1.RollingUpdateMachineDeploymentStrategyType,
RollingUpdate: clusterv1.MachineDeploymentRolloutStrategyRollingUpdate{
MaxSurge: ptr.To(intstr.FromInt32(maxSurge)),
MaxUnavailable: ptr.To(intstr.FromInt32(maxUnavailable)),
},
},
},
},
Status: clusterv1.MachineDeploymentStatus{
Replicas: &replicas,
AvailableReplicas: &replicas,
},
}
for _, opt := range options {
opt(md)
}
return md
}

func createMS(name, failureDomain string, replicas int32) *clusterv1.MachineSet {
return &clusterv1.MachineSet{
func withStatusAvailableReplicas(r int32) fakeMachineSetOption {
return func(ms *clusterv1.MachineSet) {
ms.Status.AvailableReplicas = ptr.To(r)
}
}

func createMS(name, failureDomain string, replicas int32, opts ...fakeMachineSetOption) *clusterv1.MachineSet {
ms := &clusterv1.MachineSet{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Expand All @@ -903,6 +925,10 @@ func createMS(name, failureDomain string, replicas int32) *clusterv1.MachineSet
AvailableReplicas: ptr.To(replicas),
},
}
for _, opt := range opts {
opt(ms)
}
return ms
}

func createM(name, ownedByMS, failureDomain string) *clusterv1.Machine {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
## Regular rollout, 12 Replicas, maxSurge 3, maxUnavailable 1, scale down to 6, random(0)

[Test] Initial state
md, 6/6 replicas
- ms1, 9/9 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12)
- ms2, 3/3 replicas (m13,m14,m15)
[Test] Rollout 12 replicas, MaxSurge=3, MaxUnavailable=1, random(0)
[MS controller] Iteration 1, Reconcile ms2, 3/3 replicas (m13,m14,m15)
[MS controller] Iteration 1, Reconcile ms1, 9/9 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12)
[MD controller] Iteration 1, Reconcile md
[MD controller] - Input to rollout planner
md, 6/6 replicas
- ms1, 9/9 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12)
- ms2, 3/3 replicas (m13,m14,m15)
[MD controller] - Result of rollout planner
md, 12/6 replicas
- ms1, 9/2 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12)
- ms2, 3/3 replicas (m13,m14,m15)
[Toleration] tolerate maxSurge breach
[MS controller] Iteration 2, Reconcile ms1, 9/2 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12)
[MS controller] - ms1 scale down to 2/2 replicas (m4,m5,m6,m7,m8,m9,m10 deleted)
[MS controller] Iteration 2, Reconcile ms1, 2/2 replicas (m11,m12)
[MD controller] Iteration 3, Reconcile md
[MD controller] - Input to rollout planner
md, 12/6 replicas
- ms1, 2/2 replicas (m11,m12)
- ms2, 3/3 replicas (m13,m14,m15)
[MD controller] - Result of rollout planner
md, 5/6 replicas
- ms1, 2/2 replicas (m11,m12)
- ms2, 3/6 replicas (m13,m14,m15)
[MS controller] Iteration 3, Reconcile ms2, 3/6 replicas (m13,m14,m15)
[MS controller] - ms2 scale up to 6/6 replicas (m16,m17,m18 created)
[MD controller] Iteration 4, Reconcile md
[MD controller] - Input to rollout planner
md, 5/6 replicas
- ms1, 2/2 replicas (m11,m12)
- ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
[MD controller] - Result of rollout planner
md, 8/6 replicas
- ms1, 2/0 replicas (m11,m12)
- ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
[MS controller] Iteration 4, Reconcile ms1, 2/0 replicas (m11,m12)
[MS controller] - ms1 scale down to 0/0 replicas (m11,m12 deleted)
[MS controller] Iteration 4, Reconcile ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
[MS controller] Iteration 5, Reconcile ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
[MD controller] Iteration 5, Reconcile md
[MD controller] - Input to rollout planner
md, 8/6 replicas
- ms1, 0/0 replicas ()
- ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
[MD controller] - Result of rollout planner
md, 6/6 replicas
- ms1, 0/0 replicas ()
- ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
[Test] Final state
md, 6/6 replicas
- ms1, 0/0 replicas ()
- ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
Loading