Skip to content

Commit 8bfb7bc

Browse files
committed
implement availability sets for worker nodes
1 parent 5cbe352 commit 8bfb7bc

File tree

11 files changed

+291
-163
lines changed

11 files changed

+291
-163
lines changed

cloud/defaults.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,10 @@ func GenerateDataDiskName(machineName, nameSuffix string) string {
135135
return fmt.Sprintf("%s_%s", machineName, nameSuffix)
136136
}
137137

138-
// GenerateAvailabilitySetName generates the name of a availability set based on the cluster name and the node group
139-
// node group identifies the set of nodes that belong to this availability sets. Eg. control-plane
138+
// GenerateAvailabilitySetName generates the name of a availability set based on the cluster name and the node group.
139+
// node group identifies the set of nodes that belong to this availability set:
140+
// For control plane nodes, this will be `control-plane`.
141+
// For worker nodes, this will be the machine deployment name.
140142
func GenerateAvailabilitySetName(clusterName, nodeGroup string) string {
141143
return fmt.Sprintf("%s_%s-as", clusterName, nodeGroup)
142144
}

cloud/scope/cluster.go

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -269,21 +269,6 @@ func (s *ClusterScope) PrivateDNSSpec() *azure.PrivateDNSSpec {
269269
return spec
270270
}
271271

272-
// AvailabilitySetEnabled informs control plane machines they should be part of an Availability Set
273-
func (s *ClusterScope) AvailabilitySetEnabled() bool {
274-
return len(s.AvailabilitySetSpecs()) > 0
275-
}
276-
277-
// AvailabilitySetSpecs returns the availability set specs.
278-
func (s *ClusterScope) AvailabilitySetSpecs() []azure.AvailabilitySetSpec {
279-
if len(s.AzureCluster.Status.FailureDomains) == 0 {
280-
return []azure.AvailabilitySetSpec{{
281-
Name: azure.GenerateAvailabilitySetName(s.ClusterName(), azure.ControlPlaneNodeGroup),
282-
}}
283-
}
284-
return nil
285-
}
286-
287272
// Vnet returns the cluster Vnet.
288273
func (s *ClusterScope) Vnet() *infrav1.VnetSpec {
289274
return &s.AzureCluster.Spec.NetworkSpec.Vnet
@@ -400,6 +385,11 @@ func (s *ClusterScope) Location() string {
400385
return s.AzureCluster.Spec.Location
401386
}
402387

388+
// AvailabilitySetEnabled informs machines that they should be part of an Availability Set.
389+
func (s *ClusterScope) AvailabilitySetEnabled() bool {
390+
return len(s.AzureCluster.Status.FailureDomains) == 0
391+
}
392+
403393
// GenerateFQDN generates a fully qualified domain name, based on a hash, cluster name and cluster location.
404394
func (s *ClusterScope) GenerateFQDN(ipName string) string {
405395
h := fnv.New32a()

cloud/scope/machine.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,10 +328,19 @@ func (m *MachineScope) ProviderID() string {
328328

329329
// AvailabilitySet returns the availability set for this machine if available
330330
func (m *MachineScope) AvailabilitySet() (string, bool) {
331-
if m.IsControlPlane() && m.AvailabilitySetEnabled() {
331+
if !m.AvailabilitySetEnabled() {
332+
return "", false
333+
}
334+
335+
if m.IsControlPlane() {
332336
return azure.GenerateAvailabilitySetName(m.ClusterName(), azure.ControlPlaneNodeGroup), true
333337
}
334338

339+
// get machine deployment name from labels for machines that maybe part of a machine deployment.
340+
if mdName, ok := m.Machine.Labels[clusterv1.MachineDeploymentLabelName]; ok {
341+
return azure.GenerateAvailabilitySetName(m.ClusterName(), mdName), true
342+
}
343+
335344
return "", false
336345
}
337346

cloud/scope/managedcontrolplane.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,3 @@ func (s *ManagedControlPlaneScope) OutboundLBName(_ string) string {
241241
func (s *ManagedControlPlaneScope) OutboundPoolName(_ string) string {
242242
return "aksOutboundBackendPool" // hard-coded in aks
243243
}
244-
245-
// FailureDomains returns the failure domains
246-
func (s *ManagedControlPlaneScope) FailureDomains() clusterv1.FailureDomains {
247-
return clusterv1.FailureDomains{}
248-
}

cloud/services/availabilitysets/availabilitysets.go

Lines changed: 58 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"github.com/Azure/go-autorest/autorest/to"
2525
"github.com/go-logr/logr"
2626
"github.com/pkg/errors"
27-
"k8s.io/utils/pointer"
2827

2928
infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1alpha3"
3029
azure "sigs.k8s.io/cluster-api-provider-azure/cloud"
@@ -37,7 +36,7 @@ import (
3736
type AvailabilitySetScope interface {
3837
logr.Logger
3938
azure.ClusterDescriber
40-
AvailabilitySetSpecs() []azure.AvailabilitySetSpec
39+
AvailabilitySet() (string, bool)
4140
}
4241

4342
// Service provides operations on azure resources
@@ -61,8 +60,8 @@ func (s *Service) Reconcile(ctx context.Context) error {
6160
ctx, span := tele.Tracer().Start(ctx, "availabilitysets.Service.Reconcile")
6261
defer span.End()
6362

64-
if len(s.Scope.AvailabilitySetSpecs()) == 0 {
65-
//noop if there are failure domains
63+
availabilitySetName, ok := s.Scope.AvailabilitySet()
64+
if !ok {
6665
return nil
6766
}
6867

@@ -81,33 +80,32 @@ func (s *Service) Reconcile(ctx context.Context) error {
8180
return errors.Wrapf(err, "failed to determine max fault domain count")
8281
}
8382

84-
for _, asSpec := range s.Scope.AvailabilitySetSpecs() {
85-
s.Scope.V(2).Info("creating availability set", "availability set", asSpec.Name)
86-
87-
asParams := compute.AvailabilitySet{
88-
Sku: &compute.Sku{
89-
Name: pointer.StringPtr(string(compute.Aligned)),
90-
},
91-
AvailabilitySetProperties: &compute.AvailabilitySetProperties{
92-
PlatformFaultDomainCount: pointer.Int32Ptr(int32(faultDomainCount)),
93-
},
94-
Tags: converters.TagsToMap(infrav1.Build(infrav1.BuildParams{
95-
ClusterName: s.Scope.ClusterName(),
96-
Lifecycle: infrav1.ResourceLifecycleOwned,
97-
Name: to.StringPtr(asSpec.Name),
98-
Role: to.StringPtr(infrav1.CommonRole),
99-
Additional: s.Scope.AdditionalTags(),
100-
})),
101-
Location: pointer.StringPtr(s.Scope.Location()),
102-
}
103-
104-
_, err := s.Client.CreateOrUpdate(ctx, s.Scope.ResourceGroup(), asSpec.Name, asParams)
105-
if err != nil {
106-
return errors.Wrapf(err, "failed to create availability set %s", asSpec.Name)
107-
}
108-
s.Scope.V(2).Info("successfully created availability set", "availability set", asSpec.Name)
83+
s.Scope.V(2).Info("creating availability set", "availability set", availabilitySetName)
84+
85+
asParams := compute.AvailabilitySet{
86+
Sku: &compute.Sku{
87+
Name: to.StringPtr(string(compute.Aligned)),
88+
},
89+
AvailabilitySetProperties: &compute.AvailabilitySetProperties{
90+
PlatformFaultDomainCount: to.Int32Ptr(int32(faultDomainCount)),
91+
},
92+
Tags: converters.TagsToMap(infrav1.Build(infrav1.BuildParams{
93+
ClusterName: s.Scope.ClusterName(),
94+
Lifecycle: infrav1.ResourceLifecycleOwned,
95+
Name: to.StringPtr(availabilitySetName),
96+
Role: to.StringPtr(infrav1.CommonRole),
97+
Additional: s.Scope.AdditionalTags(),
98+
})),
99+
Location: to.StringPtr(s.Scope.Location()),
100+
}
101+
102+
_, err = s.Client.CreateOrUpdate(ctx, s.Scope.ResourceGroup(), availabilitySetName, asParams)
103+
if err != nil {
104+
return errors.Wrapf(err, "failed to create availability set %s", availabilitySetName)
109105
}
110106

107+
s.Scope.V(2).Info("successfully created availability set", "availability set", availabilitySetName)
108+
111109
return nil
112110
}
113111

@@ -116,19 +114,38 @@ func (s *Service) Delete(ctx context.Context) error {
116114
ctx, span := tele.Tracer().Start(ctx, "availabilitysets.Service.Delete")
117115
defer span.End()
118116

119-
for _, asSpec := range s.Scope.AvailabilitySetSpecs() {
120-
s.Scope.V(2).Info("deleting availability set", "availability set", asSpec.Name)
121-
err := s.Client.Delete(ctx, s.Scope.ResourceGroup(), asSpec.Name)
122-
if err != nil && azure.ResourceNotFound(err) {
123-
// already deleted
124-
continue
125-
}
126-
if err != nil {
127-
return errors.Wrapf(err, "failed to delete availability set %s in resource group %s", asSpec.Name, s.Scope.ResourceGroup())
128-
}
129-
130-
s.Scope.V(2).Info("successfully delete availability set", "availability set", asSpec.Name)
117+
availabilitySetName, ok := s.Scope.AvailabilitySet()
118+
if !ok {
119+
return nil
120+
}
121+
122+
as, err := s.Client.Get(ctx, s.Scope.ResourceGroup(), availabilitySetName)
123+
if err != nil && azure.ResourceNotFound(err) {
124+
// already deleted
125+
return nil
131126
}
132127

128+
if err != nil {
129+
return errors.Wrapf(err, "failed to get availability set %s in resource group %s", availabilitySetName, s.Scope.ResourceGroup())
130+
}
131+
132+
// only delete when the availability set does not have any vms
133+
if as.AvailabilitySetProperties != nil && as.VirtualMachines != nil && len(*as.VirtualMachines) > 0 {
134+
return nil
135+
}
136+
137+
s.Scope.V(2).Info("deleting availability set", "availability set", availabilitySetName)
138+
err = s.Client.Delete(ctx, s.Scope.ResourceGroup(), availabilitySetName)
139+
if err != nil && azure.ResourceNotFound(err) {
140+
// already deleted
141+
return nil
142+
}
143+
144+
if err != nil {
145+
return errors.Wrapf(err, "failed to delete availability set %s in resource group %s", availabilitySetName, s.Scope.ResourceGroup())
146+
}
147+
148+
s.Scope.V(2).Info("successfully delete availability set", "availability set", availabilitySetName)
149+
133150
return nil
134151
}

cloud/services/availabilitysets/availabilitysets_test.go

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,17 @@ package availabilitysets
1919
import (
2020
"context"
2121
"errors"
22+
"testing"
2223

2324
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2020-06-30/compute"
2425
"github.com/Azure/go-autorest/autorest"
2526
"github.com/Azure/go-autorest/autorest/to"
2627
"github.com/golang/mock/gomock"
2728
. "github.com/onsi/gomega"
2829
"k8s.io/utils/pointer"
29-
azure "sigs.k8s.io/cluster-api-provider-azure/cloud"
3030
"sigs.k8s.io/cluster-api-provider-azure/cloud/services/resourceskus"
3131
gomockinternal "sigs.k8s.io/cluster-api-provider-azure/internal/test/matchers/gomock"
3232

33-
"testing"
34-
3533
"k8s.io/klog/klogr"
3634
"sigs.k8s.io/cluster-api-provider-azure/cloud/services/availabilitysets/mock_availabilitysets"
3735
)
@@ -48,8 +46,8 @@ func TestReconcileAvailabilitySets(t *testing.T) {
4846
expectedError: "",
4947
expect: func(s *mock_availabilitysets.MockAvailabilitySetScopeMockRecorder, m *mock_availabilitysets.MockClientMockRecorder) {
5048
s.V(gomock.AssignableToTypeOf(2)).MinTimes(2).Return(klogr.New())
49+
s.AvailabilitySet().Return("as-name", true)
5150
s.ResourceGroup().Return("my-rg")
52-
s.AvailabilitySetSpecs().Return([]azure.AvailabilitySetSpec{{Name: "as-name"}}).Times(2)
5351
s.ClusterName().Return("cl-name")
5452
s.AdditionalTags().Return(map[string]string{})
5553
s.Location().Return("test-location")
@@ -83,10 +81,10 @@ func TestReconcileAvailabilitySets(t *testing.T) {
8381
},
8482
},
8583
{
86-
name: "noop if there are no availability set specs",
84+
name: "noop if the machine does not need to be assigned an availability set (machines without a deployment)",
8785
expectedError: "",
8886
expect: func(s *mock_availabilitysets.MockAvailabilitySetScopeMockRecorder, m *mock_availabilitysets.MockClientMockRecorder) {
89-
s.AvailabilitySetSpecs().Return(nil)
87+
s.AvailabilitySet().Return("as-name", false)
9088
},
9189
setupSKUs: func(svc *Service) {
9290
},
@@ -96,8 +94,8 @@ func TestReconcileAvailabilitySets(t *testing.T) {
9694
expectedError: "failed to create availability set as-name: something went wrong",
9795
expect: func(s *mock_availabilitysets.MockAvailabilitySetScopeMockRecorder, m *mock_availabilitysets.MockClientMockRecorder) {
9896
s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New())
97+
s.AvailabilitySet().Return("as-name", true)
9998
s.ResourceGroup().Return("my-rg")
100-
s.AvailabilitySetSpecs().Return([]azure.AvailabilitySetSpec{{Name: "as-name"}}).Times(2)
10199
s.ClusterName().Return("cl-name")
102100
s.AdditionalTags().Return(map[string]string{})
103101
s.Location().Return("test-location")
@@ -164,47 +162,72 @@ func TestDeleteAvailabilitySets(t *testing.T) {
164162
expectedError: "",
165163
expect: func(s *mock_availabilitysets.MockAvailabilitySetScopeMockRecorder, m *mock_availabilitysets.MockClientMockRecorder) {
166164
s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New())
167-
s.ResourceGroup().Return("my-rg")
168-
s.AvailabilitySetSpecs().Return([]azure.AvailabilitySetSpec{{Name: "as-name"}})
165+
s.AvailabilitySet().Return("as-name", true)
166+
s.ResourceGroup().Return("my-rg").Times(2)
167+
m.Get(gomockinternal.AContext(), "my-rg", "as-name").
168+
Return(compute.AvailabilitySet{AvailabilitySetProperties: &compute.AvailabilitySetProperties{}}, nil)
169169
m.Delete(gomockinternal.AContext(), "my-rg", "as-name").Return(nil)
170170
},
171171
},
172172
{
173-
name: "noop if there are no availability sets",
173+
name: "noop if AvailabilitySet returns false",
174174
expectedError: "",
175175
expect: func(s *mock_availabilitysets.MockAvailabilitySetScopeMockRecorder, m *mock_availabilitysets.MockClientMockRecorder) {
176-
s.AvailabilitySetSpecs().Return(nil)
176+
s.AvailabilitySet().Return("as-name", false)
177177
},
178178
},
179179
{
180-
name: "returns error",
181-
expectedError: "failed to delete availability set as-name in resource group my-rg: something went wrong",
180+
name: "noop if availability set has vms",
181+
expectedError: "",
182182
expect: func(s *mock_availabilitysets.MockAvailabilitySetScopeMockRecorder, m *mock_availabilitysets.MockClientMockRecorder) {
183-
s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New())
184-
s.ResourceGroup().Return("my-rg").MinTimes(2)
185-
s.AvailabilitySetSpecs().Return([]azure.AvailabilitySetSpec{{Name: "as-name"}})
186-
m.Delete(gomockinternal.AContext(), "my-rg", "as-name").Return(errors.New("something went wrong"))
183+
s.AvailabilitySet().Return("as-name", true)
184+
s.ResourceGroup().Return("my-rg")
185+
m.Get(gomockinternal.AContext(), "my-rg", "as-name").Return(compute.AvailabilitySet{
186+
AvailabilitySetProperties: &compute.AvailabilitySetProperties{VirtualMachines: &[]compute.SubResource{
187+
{ID: to.StringPtr("vm-id")}}}}, nil)
187188
},
188189
},
189190
{
190-
name: "noop if availability set is not found",
191+
name: "noop if availability set is already deleted - get returns 404",
191192
expectedError: "",
192193
expect: func(s *mock_availabilitysets.MockAvailabilitySetScopeMockRecorder, m *mock_availabilitysets.MockClientMockRecorder) {
193194
s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New())
195+
s.AvailabilitySet().Return("as-name", true)
194196
s.ResourceGroup().Return("my-rg")
195-
s.AvailabilitySetSpecs().Return([]azure.AvailabilitySetSpec{{Name: "as-name"}})
196-
m.Delete(gomockinternal.AContext(), "my-rg", "as-name").Return(autorest.DetailedError{StatusCode: 404})
197+
m.Get(gomockinternal.AContext(), "my-rg", "as-name").Return(compute.AvailabilitySet{},
198+
autorest.DetailedError{StatusCode: 404})
197199
},
198200
},
199201
{
200-
name: "noop if availability set is not found, and continue to the next one",
202+
name: "noop if availability set is already deleted - delete returns 404",
201203
expectedError: "",
202204
expect: func(s *mock_availabilitysets.MockAvailabilitySetScopeMockRecorder, m *mock_availabilitysets.MockClientMockRecorder) {
203205
s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New())
204-
s.ResourceGroup().Return("my-rg").MinTimes(2)
205-
s.AvailabilitySetSpecs().Return([]azure.AvailabilitySetSpec{{Name: "as-name-not-found"}, {Name: "as-name"}})
206-
m.Delete(gomockinternal.AContext(), "my-rg", "as-name-not-found").Return(autorest.DetailedError{StatusCode: 404})
207-
m.Delete(gomockinternal.AContext(), "my-rg", "as-name").Return(nil)
206+
s.AvailabilitySet().Return("as-name", true)
207+
s.ResourceGroup().Return("my-rg").Times(2)
208+
m.Get(gomockinternal.AContext(), "my-rg", "as-name").Return(compute.AvailabilitySet{}, nil)
209+
m.Delete(gomockinternal.AContext(), "my-rg", "as-name").Return(autorest.DetailedError{StatusCode: 404})
210+
},
211+
},
212+
{
213+
name: "returns error when availability set get fails",
214+
expectedError: "failed to get availability set as-name in resource group my-rg: something went wrong",
215+
expect: func(s *mock_availabilitysets.MockAvailabilitySetScopeMockRecorder, m *mock_availabilitysets.MockClientMockRecorder) {
216+
s.AvailabilitySet().Return("as-name", true)
217+
s.ResourceGroup().Return("my-rg").Times(2)
218+
m.Get(gomockinternal.AContext(), "my-rg", "as-name").Return(compute.AvailabilitySet{},
219+
errors.New("something went wrong"))
220+
},
221+
},
222+
{
223+
name: "returns error when delete fails",
224+
expectedError: "failed to delete availability set as-name in resource group my-rg: something went wrong",
225+
expect: func(s *mock_availabilitysets.MockAvailabilitySetScopeMockRecorder, m *mock_availabilitysets.MockClientMockRecorder) {
226+
s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New())
227+
s.AvailabilitySet().Return("as-name", true)
228+
s.ResourceGroup().Return("my-rg").Times(3)
229+
m.Get(gomockinternal.AContext(), "my-rg", "as-name").Return(compute.AvailabilitySet{}, nil)
230+
m.Delete(gomockinternal.AContext(), "my-rg", "as-name").Return(errors.New("something went wrong"))
208231
},
209232
},
210233
}

cloud/services/availabilitysets/mock_availabilitysets/availabilitysets_mock.go

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)