Skip to content

Commit 88c31a9

Browse files
committed
reconcile ROSAMachinePool.spec.ProviderIDList
- sync replicase to CAPI MachinePool when autoscaling is enabled - fixed ROSAMachinePool not reporting ready when autoscaling enabled
1 parent f0a5ecf commit 88c31a9

File tree

7 files changed

+251
-23
lines changed

7 files changed

+251
-23
lines changed

exp/controllers/rosamachinepool_controller.go

Lines changed: 77 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"fmt"
66
"time"
77

8+
"github.com/aws/aws-sdk-go/aws"
9+
"github.com/aws/aws-sdk-go/service/ec2"
810
"github.com/blang/semver"
911
"github.com/google/go-cmp/cmp"
1012
cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1"
@@ -15,6 +17,7 @@ import (
1517
"k8s.io/apimachinery/pkg/runtime/schema"
1618
"k8s.io/client-go/tools/record"
1719
"k8s.io/klog/v2"
20+
"k8s.io/utils/ptr"
1821
ctrl "sigs.k8s.io/controller-runtime"
1922
"sigs.k8s.io/controller-runtime/pkg/client"
2023
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
@@ -130,6 +133,7 @@ func (r *ROSAMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Requ
130133
MachinePool: machinePool,
131134
RosaMachinePool: rosaMachinePool,
132135
Logger: log,
136+
Endpoints: r.Endpoints,
133137
})
134138
if err != nil {
135139
return ctrl.Result{}, errors.Wrap(err, "failed to create scope")
@@ -198,6 +202,17 @@ func (r *ROSAMachinePoolReconciler) reconcileNormal(ctx context.Context,
198202
}
199203

200204
rosaMachinePool := machinePoolScope.RosaMachinePool
205+
machinePool := machinePoolScope.MachinePool
206+
207+
if rosaMachinePool.Spec.Autoscaling != nil && !annotations.ReplicasManagedByExternalAutoscaler(machinePool) {
208+
// make sure cluster.x-k8s.io/replicas-managed-by annotation is set on CAPI MachinePool when autoscaling is enabled.
209+
annotations.AddAnnotations(machinePool, map[string]string{
210+
clusterv1.ReplicasManagedByAnnotation: "rosa",
211+
})
212+
if err := machinePoolScope.PatchCAPIMachinePoolObject(ctx); err != nil {
213+
return ctrl.Result{}, err
214+
}
215+
}
201216

202217
nodePool, found, err := ocmClient.GetNodePool(machinePoolScope.ControlPlane.Status.ID, rosaMachinePool.Spec.NodePoolName)
203218
if err != nil {
@@ -210,9 +225,25 @@ func (r *ROSAMachinePoolReconciler) reconcileNormal(ctx context.Context,
210225
return ctrl.Result{}, fmt.Errorf("failed to ensure rosaMachinePool: %w", err)
211226
}
212227

213-
// TODO (alberto): discover and store providerIDs from aws so the CAPI controller can match then to Nodes and report readiness.
214-
rosaMachinePool.Status.Replicas = int32(nodePool.Status().CurrentReplicas())
215-
if nodePool.Replicas() == nodePool.Status().CurrentReplicas() && nodePool.Status().Message() == "" {
228+
currentReplicas := int32(nodePool.Status().CurrentReplicas())
229+
if annotations.ReplicasManagedByExternalAutoscaler(machinePool) {
230+
// Set MachinePool replicas to rosa autoscaling replicas
231+
if *machinePool.Spec.Replicas != currentReplicas {
232+
machinePoolScope.Info("Setting MachinePool replicas to rosa autoscaling replicas",
233+
"local", *machinePool.Spec.Replicas,
234+
"external", currentReplicas)
235+
machinePool.Spec.Replicas = &currentReplicas
236+
if err := machinePoolScope.PatchCAPIMachinePoolObject(ctx); err != nil {
237+
return ctrl.Result{}, err
238+
}
239+
}
240+
}
241+
if err := r.reconcileProviderIDList(ctx, machinePoolScope, nodePool); err != nil {
242+
return ctrl.Result{}, fmt.Errorf("failed to reconcile ProviderIDList: %w", err)
243+
}
244+
245+
rosaMachinePool.Status.Replicas = currentReplicas
246+
if rosa.IsNodePoolReady(nodePool) {
216247
conditions.MarkTrue(rosaMachinePool, expinfrav1.RosaMachinePoolReadyCondition)
217248
rosaMachinePool.Status.Ready = true
218249

@@ -234,7 +265,7 @@ func (r *ROSAMachinePoolReconciler) reconcileNormal(ctx context.Context,
234265
return ctrl.Result{RequeueAfter: time.Second * 60}, nil
235266
}
236267

237-
npBuilder := nodePoolBuilder(rosaMachinePool.Spec, machinePoolScope.MachinePool.Spec)
268+
npBuilder := nodePoolBuilder(rosaMachinePool.Spec, machinePool.Spec)
238269
nodePoolSpec, err := npBuilder.Build()
239270
if err != nil {
240271
return ctrl.Result{}, fmt.Errorf("failed to build rosa nodepool: %w", err)
@@ -294,20 +325,7 @@ func (r *ROSAMachinePoolReconciler) reconcileMachinePoolVersion(machinePoolScope
294325
}
295326

296327
if scheduledUpgrade == nil {
297-
policy, err := ocmClient.BuildNodeUpgradePolicy(version, nodePool.ID(), ocm.UpgradeScheduling{
298-
AutomaticUpgrades: false,
299-
// The OCM API places guardrails around the minimum and maximum delay that a user can request,
300-
// for the next run of the upgrade, which is [5min,6mo]. Set our next run request to something
301-
// slightly longer than 5min to make sure we account for the latency between when we send this
302-
// request and when the server processes it.
303-
// https://gitlab.cee.redhat.com/service/uhc-clusters-service/-/blob/master/cmd/clusters-service/servecmd/apiserver/upgrade_policy_handlers.go
304-
NextRun: time.Now().Add(6 * time.Minute),
305-
})
306-
if err != nil {
307-
return fmt.Errorf("failed to create nodePool upgrade schedule to version %s: %w", version, err)
308-
}
309-
310-
scheduledUpgrade, err = ocmClient.ScheduleNodePoolUpgrade(clusterID, nodePool.ID(), policy)
328+
scheduledUpgrade, err = rosa.ScheduleNodePoolUpgrade(ocmClient, clusterID, nodePool, version, time.Now())
311329
if err != nil {
312330
return fmt.Errorf("failed to schedule nodePool upgrade to version %s: %w", version, err)
313331
}
@@ -453,6 +471,47 @@ func nodePoolToRosaMachinePoolSpec(nodePool *cmv1.NodePool) expinfrav1.RosaMachi
453471
return spec
454472
}
455473

474+
func (r *ROSAMachinePoolReconciler) reconcileProviderIDList(ctx context.Context, machinePoolScope *scope.RosaMachinePoolScope, nodePool *cmv1.NodePool) error {
475+
tags := nodePool.AWSNodePool().Tags()
476+
if len(tags) == 0 {
477+
// can't identify EC2 instances belonging to this NodePool without tags.
478+
return nil
479+
}
480+
481+
ec2Svc := scope.NewEC2Client(machinePoolScope, machinePoolScope, &machinePoolScope.Logger, machinePoolScope.InfraCluster())
482+
response, err := ec2Svc.DescribeInstancesWithContext(ctx, &ec2.DescribeInstancesInput{
483+
Filters: buildEC2FiltersFromTags(tags),
484+
})
485+
if err != nil {
486+
return err
487+
}
488+
489+
var providerIDList []string
490+
for _, reservation := range response.Reservations {
491+
for _, instance := range reservation.Instances {
492+
providerID := scope.GenerateProviderID(*instance.Placement.AvailabilityZone, *instance.InstanceId)
493+
providerIDList = append(providerIDList, providerID)
494+
}
495+
}
496+
497+
machinePoolScope.RosaMachinePool.Spec.ProviderIDList = providerIDList
498+
return nil
499+
}
500+
501+
func buildEC2FiltersFromTags(tags map[string]string) []*ec2.Filter {
502+
filters := make([]*ec2.Filter, len(tags))
503+
for key, value := range tags {
504+
filters = append(filters, &ec2.Filter{
505+
Name: ptr.To(fmt.Sprintf("tag:%s", key)),
506+
Values: aws.StringSlice([]string{
507+
value,
508+
}),
509+
})
510+
}
511+
512+
return filters
513+
}
514+
456515
func rosaControlPlaneToRosaMachinePoolMapFunc(c client.Client, gvk schema.GroupVersionKind, log logger.Wrapper) handler.MapFunc {
457516
return func(ctx context.Context, o client.Object) []reconcile.Request {
458517
rosaControlPlane, ok := o.(*rosacontrolplanev1.ROSAControlPlane)

pkg/cloud/scope/machine.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package scope
1919
import (
2020
"context"
2121
"encoding/base64"
22-
"fmt"
2322

2423
"github.com/pkg/errors"
2524
corev1 "k8s.io/api/core/v1"
@@ -142,7 +141,7 @@ func (m *MachineScope) GetProviderID() string {
142141

143142
// SetProviderID sets the AWSMachine providerID in spec.
144143
func (m *MachineScope) SetProviderID(instanceID, availabilityZone string) {
145-
providerID := fmt.Sprintf("aws:///%s/%s", availabilityZone, instanceID)
144+
providerID := GenerateProviderID(availabilityZone, instanceID)
146145
m.AWSMachine.Spec.ProviderID = ptr.To[string](providerID)
147146
}
148147

pkg/cloud/scope/providerid.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package scope
1818

1919
import (
20+
"fmt"
2021
"regexp"
2122
"strings"
2223

@@ -124,3 +125,14 @@ func (p *ProviderID) Validate() bool {
124125
func (p *ProviderID) IndexKey() string {
125126
return p.String()
126127
}
128+
129+
// ProviderIDPrefix is the prefix of AWS resource IDs to form the Kubernetes Provider ID.
130+
// NOTE: this format matches the 2 slashes format used in cloud-provider and cluster-autoscaler.
131+
const ProviderIDPrefix = "aws://"
132+
133+
// GenerateProviderID generates a valid AWS Node/Machine ProviderID field.
134+
//
135+
// By default, the last id provided is used as identifier (last part).
136+
func GenerateProviderID(ids ...string) string {
137+
return fmt.Sprintf("%s/%s", ProviderIDPrefix, strings.Join(ids, "/"))
138+
}

pkg/cloud/scope/providerid_test.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
Copyright 2018 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package scope
18+
19+
import (
20+
"testing"
21+
22+
. "github.com/onsi/gomega"
23+
)
24+
25+
func TestGenerateProviderID(t *testing.T) {
26+
testCases := []struct {
27+
ids []string
28+
29+
expectedProviderID string
30+
}{
31+
{
32+
ids: []string{
33+
"eu-west-1a",
34+
"instance-id",
35+
},
36+
expectedProviderID: "aws:///eu-west-1a/instance-id",
37+
},
38+
{
39+
ids: []string{
40+
"eu-west-1a",
41+
"test-id1",
42+
"test-id2",
43+
"instance-id",
44+
},
45+
expectedProviderID: "aws:///eu-west-1a/test-id1/test-id2/instance-id",
46+
},
47+
}
48+
49+
for _, tc := range testCases {
50+
g := NewGomegaWithT(t)
51+
providerID := GenerateProviderID(tc.ids...)
52+
53+
g.Expect(providerID).To(Equal(tc.expectedProviderID))
54+
}
55+
}

pkg/cloud/scope/rosamachinepool.go

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,16 @@ package scope
1919
import (
2020
"context"
2121

22+
awsclient "github.com/aws/aws-sdk-go/aws/client"
2223
"github.com/pkg/errors"
2324
"k8s.io/klog/v2"
2425
"sigs.k8s.io/controller-runtime/pkg/client"
2526

27+
"sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
2628
rosacontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/v2/controlplane/rosa/api/v1beta2"
2729
expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
2830
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud"
31+
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/throttle"
2932
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger"
3033
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3134
expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
@@ -42,6 +45,8 @@ type RosaMachinePoolScopeParams struct {
4245
RosaMachinePool *expinfrav1.ROSAMachinePool
4346
MachinePool *expclusterv1.MachinePool
4447
ControllerName string
48+
49+
Endpoints []ServiceEndpoint
4550
}
4651

4752
// NewRosaMachinePoolScope creates a new Scope from the supplied parameters.
@@ -70,7 +75,7 @@ func NewRosaMachinePoolScope(params RosaMachinePoolScopeParams) (*RosaMachinePoo
7075
return nil, errors.Wrap(err, "failed to init MachinePool patch helper")
7176
}
7277

73-
return &RosaMachinePoolScope{
78+
scope := &RosaMachinePoolScope{
7479
Logger: *params.Logger,
7580
Client: params.Client,
7681
patchHelper: ammpHelper,
@@ -81,9 +86,22 @@ func NewRosaMachinePoolScope(params RosaMachinePoolScopeParams) (*RosaMachinePoo
8186
RosaMachinePool: params.RosaMachinePool,
8287
MachinePool: params.MachinePool,
8388
controllerName: params.ControllerName,
84-
}, nil
89+
}
90+
91+
session, serviceLimiters, err := sessionForClusterWithRegion(params.Client, scope, params.ControlPlane.Spec.Region, params.Endpoints, params.Logger)
92+
if err != nil {
93+
return nil, errors.Errorf("failed to create aws session: %v", err)
94+
}
95+
96+
scope.session = session
97+
scope.serviceLimiters = serviceLimiters
98+
99+
return scope, nil
85100
}
86101

102+
var _ cloud.Session = &RosaMachinePoolScope{}
103+
var _ cloud.SessionMetadata = &RosaMachinePoolScope{}
104+
87105
// RosaMachinePoolScope defines the basic context for an actuator to operate upon.
88106
type RosaMachinePoolScope struct {
89107
logger.Logger
@@ -96,6 +114,9 @@ type RosaMachinePoolScope struct {
96114
RosaMachinePool *expinfrav1.ROSAMachinePool
97115
MachinePool *expclusterv1.MachinePool
98116

117+
session awsclient.ConfigProvider
118+
serviceLimiters throttle.ServiceLimiters
119+
99120
controllerName string
100121
}
101122

@@ -139,6 +160,34 @@ func (s *RosaMachinePoolScope) GetSetter() conditions.Setter {
139160
return s.RosaMachinePool
140161
}
141162

163+
// ServiceLimiter implements cloud.Session.
164+
func (s *RosaMachinePoolScope) ServiceLimiter(service string) *throttle.ServiceLimiter {
165+
if sl, ok := s.serviceLimiters[service]; ok {
166+
return sl
167+
}
168+
return nil
169+
}
170+
171+
// Session implements cloud.Session.
172+
func (s *RosaMachinePoolScope) Session() awsclient.ConfigProvider {
173+
return s.session
174+
}
175+
176+
// IdentityRef implements cloud.SessionMetadata.
177+
func (s *RosaMachinePoolScope) IdentityRef() *v1beta2.AWSIdentityReference {
178+
return s.ControlPlane.Spec.IdentityRef
179+
}
180+
181+
// InfraClusterName implements cloud.SessionMetadata.
182+
func (s *RosaMachinePoolScope) InfraClusterName() string {
183+
return s.ControlPlane.Name
184+
}
185+
186+
// Namespace implements cloud.SessionMetadata.
187+
func (s *RosaMachinePoolScope) Namespace() string {
188+
return s.Cluster.Namespace
189+
}
190+
142191
// RosaMchinePoolReadyFalse marks the ready condition false using warning if error isn't
143192
// empty.
144193
func (s *RosaMachinePoolScope) RosaMchinePoolReadyFalse(reason string, err string) error {

pkg/rosa/helpers.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package rosa
2+
3+
import (
4+
cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1"
5+
)
6+
7+
// IsNodePoolReady checkes whether the nodepool is provisoned and all replicas are available.
8+
// If autosacling is enabled, NodePool must have replicas >= autosacling.MinReplica to be considered ready.
9+
func IsNodePoolReady(nodePool *cmv1.NodePool) bool {
10+
if nodePool.Status().Message() != "" {
11+
return false
12+
}
13+
14+
if nodePool.Replicas() != 0 {
15+
return nodePool.Replicas() == nodePool.Status().CurrentReplicas()
16+
}
17+
18+
if nodePool.Autoscaling() != nil {
19+
return nodePool.Status().CurrentReplicas() >= nodePool.Autoscaling().MinReplica()
20+
}
21+
22+
return false
23+
}

0 commit comments

Comments
 (0)