Skip to content

Commit 20610e5

Browse files
mweibelk8s-infra-cherrypick-robot
authored andcommitted
fix(MachinePool): always update vmssState
Updates ScaleSets service to always update the scope's vmss state with the latest data. Previously, a long running operation would cause the vmssState to not be updated and delayed creation of AzureMachinePoolMachines until much later when that long running operation completed. This change avoids this and updates the vmssState to what is retrieved from the API at all times. The call does not make more API requests because the VMSS GET request was done anyway but its result ignored.
1 parent b2c3b97 commit 20610e5

File tree

2 files changed

+42
-37
lines changed

2 files changed

+42
-37
lines changed

azure/services/scalesets/scalesets.go

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ func (s *Service) Reconcile(ctx context.Context) (retErr error) {
9696
return errors.Errorf("%T is not of type ScaleSetSpec", spec)
9797
}
9898

99-
_, err := s.Client.Get(ctx, spec)
99+
result, err := s.Client.Get(ctx, spec)
100100
if err == nil {
101101
// We can only get the existing instances if the VMSS already exists
102102
scaleSetSpec.VMSSInstances, err = s.Client.ListInstances(ctx, spec.ResourceGroupName(), spec.ResourceName())
@@ -105,34 +105,51 @@ func (s *Service) Reconcile(ctx context.Context) (retErr error) {
105105
s.Scope.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, err)
106106
return err
107107
}
108+
if result != nil {
109+
if err := s.updateScopeState(ctx, result, scaleSetSpec); err != nil {
110+
return err
111+
}
112+
}
108113
} else if !azure.ResourceNotFound(err) {
109114
return errors.Wrapf(err, "failed to get existing VMSS")
110115
}
111116

112-
result, err := s.CreateOrUpdateResource(ctx, scaleSetSpec, serviceName)
117+
result, err = s.CreateOrUpdateResource(ctx, scaleSetSpec, serviceName)
113118
s.Scope.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, err)
114119

115120
if err == nil && result != nil {
116-
vmss, ok := result.(armcompute.VirtualMachineScaleSet)
117-
if !ok {
118-
return errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", result)
121+
if err := s.updateScopeState(ctx, result, scaleSetSpec); err != nil {
122+
return err
119123
}
124+
}
120125

121-
fetchedVMSS := converters.SDKToVMSS(vmss, scaleSetSpec.VMSSInstances)
122-
if err := s.Scope.ReconcileReplicas(ctx, &fetchedVMSS); err != nil {
123-
return errors.Wrap(err, "unable to reconcile VMSS replicas")
124-
}
126+
return err
127+
}
125128

126-
// Transform the VMSS resource representation to conform to the cloud-provider-azure representation
127-
providerID, err := azprovider.ConvertResourceGroupNameToLower(azureutil.ProviderIDPrefix + fetchedVMSS.ID)
128-
if err != nil {
129-
return errors.Wrapf(err, "failed to parse VMSS ID %s", fetchedVMSS.ID)
130-
}
131-
s.Scope.SetProviderID(providerID)
132-
s.Scope.SetVMSSState(&fetchedVMSS)
129+
// updateScopeState updates the scope's VMSS state and provider ID
130+
//
131+
// Code later in the reconciler uses scope's VMSS state for determining scale status and whether to create/delete
132+
// AzureMachinePoolMachines.
133+
// N.B.: before calling this function, make sure scaleSetSpec.VMSSInstances is updated to the latest state.
134+
func (s *Service) updateScopeState(ctx context.Context, result interface{}, scaleSetSpec *ScaleSetSpec) error {
135+
vmss, ok := result.(armcompute.VirtualMachineScaleSet)
136+
if !ok {
137+
return errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", result)
133138
}
134139

135-
return err
140+
fetchedVMSS := converters.SDKToVMSS(vmss, scaleSetSpec.VMSSInstances)
141+
if err := s.Scope.ReconcileReplicas(ctx, &fetchedVMSS); err != nil {
142+
return errors.Wrap(err, "unable to reconcile VMSS replicas")
143+
}
144+
145+
// Transform the VMSS resource representation to conform to the cloud-provider-azure representation
146+
providerID, err := azprovider.ConvertResourceGroupNameToLower(azureutil.ProviderIDPrefix + fetchedVMSS.ID)
147+
if err != nil {
148+
return errors.Wrapf(err, "failed to parse VMSS ID %s", fetchedVMSS.ID)
149+
}
150+
s.Scope.SetProviderID(providerID)
151+
s.Scope.SetVMSSState(&fetchedVMSS)
152+
return nil
136153
}
137154

138155
// Delete deletes a scale set asynchronously. Delete sends a DELETE request to Azure and if accepted without error,

azure/services/scalesets/scalesets_test.go

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -119,14 +119,14 @@ func TestReconcileVMSS(t *testing.T) {
119119
spec := getDefaultVMSSSpec()
120120
// Validate spec
121121
s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes()
122-
m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil)
122+
m.Get(gomockinternal.AContext(), &defaultSpec).Return(resultVMSS, nil)
123123
m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil)
124124
r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName).Return(getResultVMSS(), nil)
125125
s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil)
126126

127-
s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil)
128-
s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID)
129-
s.SetVMSSState(&fetchedVMSS)
127+
s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil).Times(2)
128+
s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID).Times(2)
129+
s.SetVMSSState(&fetchedVMSS).Times(2)
130130
},
131131
},
132132
{
@@ -177,28 +177,16 @@ func TestReconcileVMSS(t *testing.T) {
177177
s.DefaultedAzureServiceReconcileTimeout().Return(reconciler.DefaultAzureServiceReconcileTimeout)
178178
spec := getDefaultVMSSSpec()
179179
s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes()
180-
m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil)
180+
m.Get(gomockinternal.AContext(), &defaultSpec).Return(resultVMSS, nil)
181181
m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil)
182182

183183
r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName).
184184
Return(nil, internalError())
185185
s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, internalError())
186-
},
187-
},
188-
{
189-
name: "failed to reconcile replicas",
190-
expectedError: "unable to reconcile VMSS replicas:.*#: Internal Server Error: StatusCode=500",
191-
expect: func(g *WithT, s *mock_scalesets.MockScaleSetScopeMockRecorder, r *mock_async.MockReconcilerMockRecorder, m *mock_scalesets.MockClientMockRecorder) {
192-
s.DefaultedAzureServiceReconcileTimeout().Return(reconciler.DefaultAzureServiceReconcileTimeout)
193-
spec := getDefaultVMSSSpec()
194-
s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes()
195-
m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil)
196-
m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil)
197186

198-
r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName).Return(getResultVMSS(), nil)
199-
s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil)
200-
201-
s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(internalError())
187+
s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil)
188+
s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID)
189+
s.SetVMSSState(&fetchedVMSS)
202190
},
203191
},
204192
{

0 commit comments

Comments
 (0)