Skip to content

Commit f5c2125

Browse files
authored
Merge pull request #8823 from dylanrhysscott/dscott-do-scale-to-zero
DigitalOcean: Implement TemplateNodeInfo in DO cloud provider CA to support scaling up from zero
2 parents ae03da4 + 0920b45 commit f5c2125

File tree

8 files changed

+223
-77
lines changed

8 files changed

+223
-77
lines changed

cluster-autoscaler/cloudprovider/digitalocean/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,24 +28,24 @@ picks up the configuration from the API and adjusts the behavior accordingly.
2828
# Development
2929

3030
Make sure you're inside the root path of the [autoscaler
31-
repository](https://github.com/kubernetes/autoscaler)
31+
repository](https://github.com/kubernetes/autoscaler/cluster-autoscaler)
3232

3333
1.) Build the `cluster-autoscaler` binary:
3434

3535

3636
```
37-
make build-in-docker
37+
GOARCH=amd64 make build-in-docker
3838
```
3939

4040
2.) Build the docker image:
4141

4242
```
43-
docker build -t digitalocean/cluster-autoscaler:dev .
43+
docker build --platform linux/amd64 -f Dockerfile.amd64 -t digitalocean/cluster-autoscaler:dev .
4444
```
4545

4646

4747
3.) Push the docker image to Docker hub:
4848

4949
```
50-
docker push digitalocean/cluster-autoscaler:dev
50+
docker push --platform linux/amd64 digitalocean/cluster-autoscaler:dev
5151
```

cluster-autoscaler/cloudprovider/digitalocean/digitalocean_cloud_provider_test.go

Lines changed: 70 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,70 @@ import (
2929
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
3030
)
3131

32+
func defaultDOClientMock(clusterID string) *doClientMock {
33+
client := &doClientMock{}
34+
ctx := context.Background()
35+
36+
client.On("ListNodePools", ctx, clusterID, nil).Return(
37+
[]*godo.KubernetesNodePool{
38+
{
39+
ID: "1",
40+
Nodes: []*godo.KubernetesNode{
41+
{ID: "1", Status: &godo.KubernetesNodeStatus{State: "running"}},
42+
{ID: "2", Status: &godo.KubernetesNodeStatus{State: "running"}},
43+
},
44+
AutoScale: true,
45+
},
46+
{
47+
ID: "2",
48+
Nodes: []*godo.KubernetesNode{
49+
{ID: "3", Status: &godo.KubernetesNodeStatus{State: "deleting"}},
50+
{ID: "4", Status: &godo.KubernetesNodeStatus{State: "running"}},
51+
},
52+
AutoScale: true,
53+
},
54+
{
55+
ID: "3",
56+
Nodes: []*godo.KubernetesNode{
57+
{ID: "5", Status: &godo.KubernetesNodeStatus{State: "provisioning"}},
58+
{ID: "6", Status: &godo.KubernetesNodeStatus{State: "running"}},
59+
},
60+
AutoScale: true,
61+
},
62+
{
63+
ID: "4",
64+
Nodes: []*godo.KubernetesNode{
65+
{ID: "7", Status: &godo.KubernetesNodeStatus{State: "draining"}},
66+
{ID: "8", Status: &godo.KubernetesNodeStatus{State: "running"}},
67+
},
68+
AutoScale: false,
69+
},
70+
},
71+
&godo.Response{},
72+
nil,
73+
).Once()
74+
return client
75+
}
76+
77+
func setGetNodeTemplateMock(c *doClientMock, times int) *doClientMock {
78+
c.On("GetNodePoolTemplate", context.Background(), "123456", "").Return(&godo.KubernetesNodePoolTemplate{
79+
Template: &godo.KubernetesNodeTemplate{
80+
Labels: make(map[string]string),
81+
Capacity: &godo.KubernetesNodePoolResources{
82+
CPU: 1,
83+
Memory: "2048Mi",
84+
Pods: 110,
85+
},
86+
Allocatable: &godo.KubernetesNodePoolResources{
87+
CPU: 380,
88+
Memory: "1024MI",
89+
Pods: 110,
90+
},
91+
},
92+
}, &godo.Response{}, nil).Times(times)
93+
return c
94+
}
95+
3296
func testCloudProvider(t *testing.T, client *doClientMock) *digitaloceanCloudProvider {
3397
cfg := `{"cluster_id": "123456", "token": "123-123-123", "url": "https://api.digitalocean.com/v2", "version": "dev"}`
3498

@@ -38,47 +102,7 @@ func testCloudProvider(t *testing.T, client *doClientMock) *digitaloceanCloudPro
38102

39103
// fill the test provider with some example
40104
if client == nil {
41-
client = &doClientMock{}
42-
ctx := context.Background()
43-
44-
client.On("ListNodePools", ctx, manager.clusterID, nil).Return(
45-
[]*godo.KubernetesNodePool{
46-
{
47-
ID: "1",
48-
Nodes: []*godo.KubernetesNode{
49-
{ID: "1", Status: &godo.KubernetesNodeStatus{State: "running"}},
50-
{ID: "2", Status: &godo.KubernetesNodeStatus{State: "running"}},
51-
},
52-
AutoScale: true,
53-
},
54-
{
55-
ID: "2",
56-
Nodes: []*godo.KubernetesNode{
57-
{ID: "3", Status: &godo.KubernetesNodeStatus{State: "deleting"}},
58-
{ID: "4", Status: &godo.KubernetesNodeStatus{State: "running"}},
59-
},
60-
AutoScale: true,
61-
},
62-
{
63-
ID: "3",
64-
Nodes: []*godo.KubernetesNode{
65-
{ID: "5", Status: &godo.KubernetesNodeStatus{State: "provisioning"}},
66-
{ID: "6", Status: &godo.KubernetesNodeStatus{State: "running"}},
67-
},
68-
AutoScale: true,
69-
},
70-
{
71-
ID: "4",
72-
Nodes: []*godo.KubernetesNode{
73-
{ID: "7", Status: &godo.KubernetesNodeStatus{State: "draining"}},
74-
{ID: "8", Status: &godo.KubernetesNodeStatus{State: "running"}},
75-
},
76-
AutoScale: false,
77-
},
78-
},
79-
&godo.Response{},
80-
nil,
81-
).Once()
105+
client = defaultDOClientMock(manager.clusterID)
82106
}
83107

84108
manager.client = client
@@ -102,7 +126,10 @@ func TestDigitalOceanCloudProvider_Name(t *testing.T) {
102126
}
103127

104128
func TestDigitalOceanCloudProvider_NodeGroups(t *testing.T) {
105-
provider := testCloudProvider(t, nil)
129+
c := defaultDOClientMock("123456")
130+
c = setGetNodeTemplateMock(c, 3)
131+
132+
provider := testCloudProvider(t, c)
106133
err := provider.manager.Refresh()
107134
assert.NoError(t, err)
108135

@@ -124,7 +151,7 @@ func TestDigitalOceanCloudProvider_NodeGroupForNode(t *testing.T) {
124151
t.Run("success", func(t *testing.T) {
125152
client := &doClientMock{}
126153
ctx := context.Background()
127-
154+
client = setGetNodeTemplateMock(client, 2)
128155
client.On("ListNodePools", ctx, clusterID, nil).Return(
129156
[]*godo.KubernetesNodePool{
130157
{

cluster-autoscaler/cloudprovider/digitalocean/digitalocean_manager.go

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ var (
3535
)
3636

3737
type nodeGroupClient interface {
38+
// GetNodePoolTemplate returns the template for a given node pool - used in helping CA for scale up from zero simulations.
39+
GetNodePoolTemplate(ctx context.Context, clusterID string, nodePoolName string) (*godo.KubernetesNodePoolTemplate, *godo.Response, error)
40+
3841
// ListNodePools lists all the node pools found in a Kubernetes cluster.
3942
ListNodePools(ctx context.Context, clusterID string, opts *godo.ListOptions) ([]*godo.KubernetesNodePool, *godo.Response, error)
4043

@@ -147,17 +150,22 @@ func (m *Manager) Refresh() error {
147150
if !nodePool.AutoScale {
148151
continue
149152
}
150-
153+
nodePoolTemplateResponse, _, err := m.client.GetNodePoolTemplate(ctx, m.clusterID, nodePool.Name)
154+
klog.V(4).Infof("fetched template response - %v", nodePoolTemplateResponse)
155+
if err != nil {
156+
return err
157+
}
151158
klog.V(4).Infof("adding node pool: %q name: %s min: %d max: %d",
152159
nodePool.ID, nodePool.Name, nodePool.MinNodes, nodePool.MaxNodes)
153160

154161
group = append(group, &NodeGroup{
155-
id: nodePool.ID,
156-
clusterID: m.clusterID,
157-
client: m.client,
158-
nodePool: nodePool,
159-
minSize: nodePool.MinNodes,
160-
maxSize: nodePool.MaxNodes,
162+
id: nodePool.ID,
163+
clusterID: m.clusterID,
164+
client: m.client,
165+
nodePool: nodePool,
166+
nodePoolTemplate: nodePoolTemplateResponse,
167+
minSize: nodePool.MinNodes,
168+
maxSize: nodePool.MaxNodes,
161169
})
162170
}
163171

cluster-autoscaler/cloudprovider/digitalocean/digitalocean_manager_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ func TestDigitalOceanManager_Refresh(t *testing.T) {
9090
assert.NoError(t, err)
9191

9292
client := &doClientMock{}
93+
client = setGetNodeTemplateMock(client, 4)
9394
ctx := context.Background()
9495

9596
client.On("ListNodePools", ctx, manager.clusterID, nil).Return(
@@ -147,6 +148,8 @@ func TestDigitalOceanManager_RefreshWithNodeSpec(t *testing.T) {
147148
assert.NoError(t, err)
148149

149150
client := &doClientMock{}
151+
client = setGetNodeTemplateMock(client, 4)
152+
150153
ctx := context.Background()
151154

152155
client.On("ListNodePools", ctx, manager.clusterID, nil).Return(

cluster-autoscaler/cloudprovider/digitalocean/digitalocean_node_group.go

Lines changed: 92 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,25 @@ import (
2020
"context"
2121
"errors"
2222
"fmt"
23+
"math/rand"
24+
"strings"
2325

2426
"github.com/digitalocean/godo"
2527
apiv1 "k8s.io/api/core/v1"
26-
28+
"k8s.io/apimachinery/pkg/api/resource"
29+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2730
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
2831
"k8s.io/autoscaler/cluster-autoscaler/config"
2932
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
33+
utilerrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
34+
"k8s.io/kubernetes/pkg/util/taints"
3035
)
3136

3237
const (
33-
doksLabelNamespace = "doks.digitalocean.com"
34-
nodeIDLabel = doksLabelNamespace + "/node-id"
38+
doksLabelNamespace = "doks.digitalocean.com"
39+
nodeIDLabel = doksLabelNamespace + "/node-id"
40+
generatedWorkerNameSuffixLength = 6
41+
generatedWorkerNameCharset = "n38uc7mqfyxojrbwgea6tl2ps5kh4ivd01z9"
3542
)
3643

3744
var (
@@ -43,13 +50,13 @@ var (
4350
// configuration info and functions to control a set of nodes that have the
4451
// same capacity and set of labels.
4552
type NodeGroup struct {
46-
id string
47-
clusterID string
48-
client nodeGroupClient
49-
nodePool *godo.KubernetesNodePool
50-
51-
minSize int
52-
maxSize int
53+
id string
54+
clusterID string
55+
client nodeGroupClient
56+
nodePool *godo.KubernetesNodePool
57+
nodePoolTemplate *godo.KubernetesNodePoolTemplate
58+
minSize int
59+
maxSize int
5360
}
5461

5562
// MaxSize returns maximum size of the node group.
@@ -213,7 +220,21 @@ func (n *NodeGroup) Nodes() ([]cloudprovider.Instance, error) {
213220
// that are started on the node by default, using manifest (most likely only
214221
// kube-proxy). Implementation optional.
215222
func (n *NodeGroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
216-
return nil, cloudprovider.ErrNotImplemented
223+
if n.nodePoolTemplate != nil {
224+
// Template has already been populated from cache - convert to node info and return
225+
tni, err := toNodeInfoTemplate(n.nodePoolTemplate)
226+
if err != nil {
227+
return nil, utilerrors.NewAutoscalerError(utilerrors.InternalError, err.Error())
228+
}
229+
return tni, nil
230+
}
231+
232+
// No template present in cache - attempt to fetch from API
233+
resp, _, err := n.client.GetNodePoolTemplate(context.TODO(), n.clusterID, n.nodePool.Name)
234+
if err != nil {
235+
return nil, utilerrors.NewAutoscalerError(utilerrors.InternalError, err.Error())
236+
}
237+
return toNodeInfoTemplate(resp)
217238
}
218239

219240
// Exist checks if the node group really exists on the cloud provider side.
@@ -292,3 +313,63 @@ func toInstanceStatus(nodeState *godo.KubernetesNodeStatus) *cloudprovider.Insta
292313

293314
return st
294315
}
316+
317+
func toNodeInfoTemplate(resp *godo.KubernetesNodePoolTemplate) (*framework.NodeInfo, error) {
318+
allocatable, err := parseToQuanitity(resp.Template.Allocatable.CPU, resp.Template.Allocatable.Pods, resp.Template.Allocatable.Memory)
319+
if err != nil {
320+
return nil, fmt.Errorf("failed to create allocatable resources - %s", err)
321+
}
322+
capacity, err := parseToQuanitity(resp.Template.Capacity.CPU, resp.Template.Capacity.Pods, resp.Template.Capacity.Memory)
323+
if err != nil {
324+
return nil, fmt.Errorf("failed to create capacity resources - %s", err)
325+
}
326+
addedTaints, _, err := taints.ParseTaints(resp.Template.Taints)
327+
if err != nil {
328+
return nil, fmt.Errorf("failed to parse taints from template - %s", err)
329+
}
330+
l := map[string]string{
331+
apiv1.LabelOSStable: cloudprovider.DefaultOS,
332+
apiv1.LabelArchStable: cloudprovider.DefaultArch,
333+
}
334+
335+
l = cloudprovider.JoinStringMaps(l, resp.Template.Labels)
336+
node := &apiv1.Node{
337+
ObjectMeta: metav1.ObjectMeta{
338+
Name: generateWorkerName(resp.Template.Name),
339+
Labels: l,
340+
},
341+
Spec: apiv1.NodeSpec{
342+
Taints: addedTaints,
343+
},
344+
Status: apiv1.NodeStatus{
345+
Allocatable: allocatable,
346+
Capacity: capacity,
347+
Phase: apiv1.NodeRunning,
348+
Conditions: cloudprovider.BuildReadyConditions(),
349+
},
350+
}
351+
return framework.NewNodeInfo(node, nil), nil
352+
}
353+
354+
func parseToQuanitity(cpu int64, pods int64, memory string) (apiv1.ResourceList, error) {
355+
c := resource.NewQuantity(cpu, resource.DecimalSI)
356+
p := resource.NewQuantity(pods, resource.DecimalSI)
357+
m, err := resource.ParseQuantity(memory)
358+
if err != nil {
359+
return nil, err
360+
}
361+
return apiv1.ResourceList{
362+
apiv1.ResourceCPU: *c,
363+
apiv1.ResourceMemory: m,
364+
apiv1.ResourcePods: *p,
365+
}, nil
366+
}
367+
368+
func generateWorkerName(poolName string) string {
369+
var b strings.Builder
370+
for i := 0; i < generatedWorkerNameSuffixLength; i++ {
371+
d := generatedWorkerNameCharset[rand.Intn(len(generatedWorkerNameCharset))]
372+
b.WriteByte(d)
373+
}
374+
return fmt.Sprintf("%s-%s", poolName, b.String())
375+
}

cluster-autoscaler/cloudprovider/digitalocean/digitalocean_node_group_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"errors"
2222
"fmt"
23+
"strings"
2324
"testing"
2425

2526
"github.com/digitalocean/godo"
@@ -442,6 +443,18 @@ func testNodeGroup(client nodeGroupClient, np *godo.KubernetesNodePool) *NodeGro
442443
}
443444
}
444445

446+
func TestGenerateWorkerName(t *testing.T) {
447+
t.Run("generate worker node name", func(t *testing.T) {
448+
prefix := "testpool"
449+
expectedLength := generatedWorkerNameSuffixLength
450+
g := generateWorkerName(prefix)
451+
parts := strings.Split(g, "-")
452+
assert.Equal(t, 2, len(parts), "incorrect number of components for generated worker name")
453+
assert.Equal(t, prefix, parts[0], "unexpected prefix in generated worker name")
454+
assert.Equal(t, expectedLength, len(parts[1]), "incorrect suffix length for generated worker name")
455+
})
456+
}
457+
445458
type doClientMock struct {
446459
mock.Mock
447460
}
@@ -460,3 +473,8 @@ func (m *doClientMock) DeleteNode(ctx context.Context, clusterID, poolID, nodeID
460473
args := m.Called(ctx, clusterID, poolID, nodeID, nil)
461474
return args.Get(0).(*godo.Response), args.Error(1)
462475
}
476+
477+
func (m *doClientMock) GetNodePoolTemplate(ctx context.Context, clusterID string, nodePoolName string) (*godo.KubernetesNodePoolTemplate, *godo.Response, error) {
478+
args := m.Called(ctx, clusterID, nodePoolName)
479+
return args.Get(0).(*godo.KubernetesNodePoolTemplate), args.Get(1).(*godo.Response), args.Error(2)
480+
}

0 commit comments

Comments
 (0)