Skip to content

Commit e673a9b

Browse files
committed
Add code for reading unstructured clusterapi csidriver annotations
1 parent cb723d4 commit e673a9b

27 files changed

+903
-31
lines changed

cluster-autoscaler/cloudprovider/clusterapi/README.md

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,30 @@ metadata:
307307
> Please see the [Cluster API Book chapter on Metadata propagation](https://cluster-api.sigs.k8s.io/reference/api/metadata-propagation)
308308
> for more information.
309309

310+
311+
#### Pre-defined csi driver information on nodes scaled from zero
312+
313+
To provide CSI driver information for scale from zero, the optional
314+
capacity annotation may be supplied as a comma separated list of driver name
315+
and volume limit key/value pairs, as demonstrated in the example below:
316+
317+
```yaml
318+
apiVersion: cluster.x-k8s.io/v1alpha4
319+
kind: MachineDeployment
320+
metadata:
321+
annotations:
322+
cluster.x-k8s.io/cluster-api-autoscaler-node-group-max-size: "5"
323+
cluster.x-k8s.io/cluster-api-autoscaler-node-group-min-size: "0"
324+
capacity.cluster-autoscaler.kubernetes.io/memory: "128G"
325+
capacity.cluster-autoscaler.kubernetes.io/cpu: "16"
326+
capacity.cluster-autoscaler.kubernetes.io/csi-driver: "ebs.csi.aws.com=25,efs.csi.aws.com=16"
327+
```
328+
329+
> Note: The CSI driver information supplied through the capacity annotation
330+
> specifies which CSI drivers will be installed on nodes scaled from zero, along
331+
> with their respective volume limits. The format is `driver-name=volume-limit`
332+
> with multiple drivers separated by commas.
333+
310334
#### Per-NodeGroup autoscaling options
311335

312336
Custom autoscaling options per node group (MachineDeployment/MachinePool/MachineSet) can be specified as annoations with a common prefix:
@@ -328,14 +352,14 @@ metadata:
328352
cluster.x-k8s.io/autoscaling-options-maxnodeprovisiontime: "20m0s"
329353
```
330354

331-
#### CPU Architecture awareness for single-arch clusters
355+
#### CPU Architecture awareness for single-arch clusters
332356

333-
Users of single-arch non-amd64 clusters who are using scale from zero
357+
Users of single-arch non-amd64 clusters who are using scale from zero
334358
support should also set the `CAPI_SCALE_ZERO_DEFAULT_ARCH` environment variable
335359
to set the architecture of the nodes they want to default the node group templates to.
336-
The autoscaler will default to `amd64` if it is not set, and the node
337-
group templates may not match the nodes' architecture, specifically when
338-
the workload triggering the scale-up uses a node affinity predicate checking
360+
The autoscaler will default to `amd64` if it is not set, and the node
361+
group templates may not match the nodes' architecture, specifically when
362+
the workload triggering the scale-up uses a node affinity predicate checking
339363
for the node's architecture.
340364

341365
## Specifying a Custom Resource Group

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,8 +380,12 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
380380
if err != nil {
381381
return nil, err
382382
}
383+
csiNode := ng.scalableResource.InstanceCSINode()
383384

384385
nodeInfo := framework.NewNodeInfo(&node, resourceSlices, &framework.PodInfo{Pod: cloudprovider.BuildKubeProxy(ng.scalableResource.Name())})
386+
if csiNode != nil {
387+
nodeInfo.AddCSINode(csiNode)
388+
}
385389
return nodeInfo, nil
386390
}
387391

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,15 @@ import (
2727

2828
"github.com/stretchr/testify/assert"
2929
corev1 "k8s.io/api/core/v1"
30+
storagev1 "k8s.io/api/storage/v1"
3031
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3132
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3233
"k8s.io/apimachinery/pkg/util/wait"
3334
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
3435
"k8s.io/autoscaler/cluster-autoscaler/config"
3536
gpuapis "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
3637
"k8s.io/client-go/tools/cache"
38+
"k8s.io/utils/ptr"
3739
)
3840

3941
const (
@@ -1500,6 +1502,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
15001502
expectedCapacity map[corev1.ResourceName]int64
15011503
expectedNodeLabels map[string]string
15021504
expectedResourceSlice testResourceSlice
1505+
expectedCSINode *storagev1.CSINode
15031506
}
15041507

15051508
testCases := []struct {
@@ -1650,6 +1653,49 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
16501653
},
16511654
},
16521655
},
1656+
{
1657+
name: "When the NodeGroup can scale from zero and CSI driver annotations are present, it creates CSINode with driver information",
1658+
nodeGroupAnnotations: map[string]string{
1659+
memoryKey: "2048Mi",
1660+
cpuKey: "2",
1661+
csiDriverKey: "ebs.csi.aws.com=25,efs.csi.aws.com=16",
1662+
},
1663+
config: testCaseConfig{
1664+
expectedErr: nil,
1665+
nodeLabels: map[string]string{
1666+
"kubernetes.io/os": "linux",
1667+
"kubernetes.io/arch": "amd64",
1668+
},
1669+
expectedCapacity: map[corev1.ResourceName]int64{
1670+
corev1.ResourceCPU: 2,
1671+
corev1.ResourceMemory: 2048 * 1024 * 1024,
1672+
corev1.ResourcePods: 110,
1673+
},
1674+
expectedNodeLabels: map[string]string{
1675+
"kubernetes.io/os": "linux",
1676+
"kubernetes.io/arch": "amd64",
1677+
"kubernetes.io/hostname": "random value",
1678+
},
1679+
expectedCSINode: &storagev1.CSINode{
1680+
Spec: storagev1.CSINodeSpec{
1681+
Drivers: []storagev1.CSINodeDriver{
1682+
{
1683+
Name: "ebs.csi.aws.com",
1684+
Allocatable: &storagev1.VolumeNodeResources{
1685+
Count: ptr.To(int32(25)),
1686+
},
1687+
},
1688+
{
1689+
Name: "efs.csi.aws.com",
1690+
Allocatable: &storagev1.VolumeNodeResources{
1691+
Count: ptr.To(int32(16)),
1692+
},
1693+
},
1694+
},
1695+
},
1696+
},
1697+
},
1698+
},
16531699
}
16541700

16551701
test := func(t *testing.T, testConfig *TestConfig, config testCaseConfig) {
@@ -1726,6 +1772,55 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
17261772
}
17271773
}
17281774
}
1775+
1776+
// Validate CSINode if expected
1777+
if config.expectedCSINode != nil {
1778+
if nodeInfo.CSINode == nil {
1779+
t.Errorf("Expected CSINode to be set, but got nil")
1780+
} else {
1781+
expectedDrivers := config.expectedCSINode.Spec.Drivers
1782+
gotDrivers := nodeInfo.CSINode.Spec.Drivers
1783+
if len(expectedDrivers) != len(gotDrivers) {
1784+
t.Errorf("Expected %d CSI drivers, but got %d", len(expectedDrivers), len(gotDrivers))
1785+
} else {
1786+
for i, expectedDriver := range expectedDrivers {
1787+
if i >= len(gotDrivers) {
1788+
t.Errorf("Expected driver at index %d but got only %d drivers", i, len(gotDrivers))
1789+
break
1790+
}
1791+
gotDriver := gotDrivers[i]
1792+
if expectedDriver.Name != gotDriver.Name {
1793+
t.Errorf("Expected CSI driver name at index %d to be %s, but got %s", i, expectedDriver.Name, gotDriver.Name)
1794+
}
1795+
if expectedDriver.Allocatable == nil {
1796+
if gotDriver.Allocatable != nil {
1797+
t.Errorf("Expected CSI driver Allocatable at index %d to be nil, but got non-nil", i)
1798+
}
1799+
} else {
1800+
if gotDriver.Allocatable == nil {
1801+
t.Errorf("Expected CSI driver Allocatable at index %d to be non-nil, but got nil", i)
1802+
} else {
1803+
if expectedDriver.Allocatable.Count == nil {
1804+
if gotDriver.Allocatable.Count != nil {
1805+
t.Errorf("Expected CSI driver Count at index %d to be nil, but got %d", i, *gotDriver.Allocatable.Count)
1806+
}
1807+
} else {
1808+
if gotDriver.Allocatable.Count == nil {
1809+
t.Errorf("Expected CSI driver Count at index %d to be %d, but got nil", i, *expectedDriver.Allocatable.Count)
1810+
} else if *expectedDriver.Allocatable.Count != *gotDriver.Allocatable.Count {
1811+
t.Errorf("Expected CSI driver Count at index %d to be %d, but got %d", i, *expectedDriver.Allocatable.Count, *gotDriver.Allocatable.Count)
1812+
}
1813+
}
1814+
}
1815+
}
1816+
}
1817+
}
1818+
}
1819+
} else {
1820+
if nodeInfo.CSINode != nil {
1821+
t.Errorf("Expected CSINode to be nil, but got non-nil with %d drivers", len(nodeInfo.CSINode.Spec.Drivers))
1822+
}
1823+
}
17291824
}
17301825

17311826
for _, tc := range testCases {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
apiv1 "k8s.io/api/core/v1"
3131
corev1 "k8s.io/api/core/v1"
3232
resourceapi "k8s.io/api/resource/v1"
33+
storagev1 "k8s.io/api/storage/v1"
3334
"k8s.io/apimachinery/pkg/api/resource"
3435
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3536
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -413,6 +414,27 @@ func (r unstructuredScalableResource) InstanceDRADriver() string {
413414
return parseDRADriver(r.unstructured.GetAnnotations())
414415
}
415416

417+
// InstanceCSINode parses CSI driver information from annotations and returns
418+
// a CSINode object with the list of installed drivers and their volume limits.
419+
// The annotation format is "driver-name=volume-limit,driver-name2=volume-limit2".
420+
// Returns nil if the annotation is not present or empty.
421+
func (r unstructuredScalableResource) InstanceCSINode() *storagev1.CSINode {
422+
annotations := r.unstructured.GetAnnotations()
423+
// annotation value of the form "driver1=limit1,driver2=limit2"
424+
if val, found := annotations[csiDriverKey]; found && val != "" {
425+
drivers := parseCSIDriverAnnotation(val)
426+
if len(drivers) == 0 {
427+
return nil
428+
}
429+
return &storagev1.CSINode{
430+
Spec: storagev1.CSINodeSpec{
431+
Drivers: drivers,
432+
},
433+
}
434+
}
435+
return nil
436+
}
437+
416438
func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*unstructured.Unstructured, error) {
417439
// Cache w/ lazy loading of the infrastructure reference resource.
418440
r.infraMutex.RLock()
@@ -533,6 +555,66 @@ func systemInfoFromInfrastructureObject(infraobj *unstructured.Unstructured) api
533555
return nsi
534556
}
535557

558+
// parseCSIDriverAnnotation parses a comma-separated list of CSI driver name and volume limit
559+
// key/value pairs in the format "driver-name=volume-limit,driver-name2=volume-limit2".
560+
// Returns a slice of CSINodeDriver objects with Name and Allocatable.Count set.
561+
func parseCSIDriverAnnotation(annotationValue string) []storagev1.CSINodeDriver {
562+
drivers := []storagev1.CSINodeDriver{}
563+
if annotationValue == "" {
564+
return drivers
565+
}
566+
567+
driverSpecs := strings.Split(annotationValue, ",")
568+
for _, driverSpec := range driverSpecs {
569+
driverSpec = strings.TrimSpace(driverSpec)
570+
if driverSpec == "" {
571+
continue
572+
}
573+
574+
// Split on "=" to get driver name and volume limit
575+
parts := strings.SplitN(driverSpec, "=", 2)
576+
if len(parts) != 2 {
577+
klog.V(4).Infof("Invalid CSI driver spec format (expected driver-name=volume-limit): %s", driverSpec)
578+
continue
579+
}
580+
581+
driverName := strings.TrimSpace(parts[0])
582+
volumeLimitStr := strings.TrimSpace(parts[1])
583+
584+
if driverName == "" {
585+
klog.V(4).Infof("Empty driver name in CSI driver spec: %s", driverSpec)
586+
continue
587+
}
588+
589+
// Parse volume limit as integer
590+
volumeLimit, err := strconv.ParseInt(volumeLimitStr, 10, 32)
591+
if err != nil {
592+
klog.V(4).Infof("Invalid volume limit value (expected integer) in CSI driver spec %s: %v", driverSpec, err)
593+
continue
594+
}
595+
596+
if volumeLimit < 0 {
597+
klog.V(4).Infof("Volume limit must be non-negative in CSI driver spec: %s", driverSpec)
598+
continue
599+
}
600+
601+
// Create CSINodeDriver with Name and optionally Allocatable.Count
602+
// If volume limit is 0, Allocatable is not set
603+
driver := storagev1.CSINodeDriver{
604+
Name: driverName,
605+
}
606+
if volumeLimit > 0 {
607+
limit := int32(volumeLimit)
608+
driver.Allocatable = &storagev1.VolumeNodeResources{
609+
Count: &limit,
610+
}
611+
}
612+
drivers = append(drivers, driver)
613+
}
614+
615+
return drivers
616+
}
617+
536618
// adapted from https://github.com/kubernetes/kubernetes/blob/release-1.25/pkg/util/taints/taints.go#L39
537619
func parseTaint(st string) (apiv1.Taint, error) {
538620
var taint apiv1.Taint

0 commit comments

Comments
 (0)