Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions cluster-autoscaler/cloudprovider/clusterapi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,30 @@ metadata:
> Please see the [Cluster API Book chapter on Metadata propagation](https://cluster-api.sigs.k8s.io/reference/api/metadata-propagation)
> for more information.


#### Pre-defined csi driver information on nodes scaled from zero

To provide CSI driver information for scale from zero, the optional
capacity annotation may be supplied as a comma separated list of driver name
and volume limit key/value pairs, as demonstrated in the example below:

```yaml
apiVersion: cluster.x-k8s.io/v1alpha4
kind: MachineDeployment
metadata:
annotations:
cluster.x-k8s.io/cluster-api-autoscaler-node-group-max-size: "5"
cluster.x-k8s.io/cluster-api-autoscaler-node-group-min-size: "0"
capacity.cluster-autoscaler.kubernetes.io/memory: "128G"
capacity.cluster-autoscaler.kubernetes.io/cpu: "16"
capacity.cluster-autoscaler.kubernetes.io/csi-driver: "ebs.csi.aws.com=25,efs.csi.aws.com=16"
```

> Note: The CSI driver information supplied through the capacity annotation
> specifies which CSI drivers will be installed on nodes scaled from zero, along
> with their respective volume limits. The format is `driver-name=volume-limit`
> with multiple drivers separated by commas.

#### Per-NodeGroup autoscaling options

Custom autoscaling options per node group (MachineDeployment/MachinePool/MachineSet) can be specified as annoations with a common prefix:
Expand All @@ -328,14 +352,14 @@ metadata:
cluster.x-k8s.io/autoscaling-options-maxnodeprovisiontime: "20m0s"
```

#### CPU Architecture awareness for single-arch clusters
#### CPU Architecture awareness for single-arch clusters

Users of single-arch non-amd64 clusters who are using scale from zero
Users of single-arch non-amd64 clusters who are using scale from zero
support should also set the `CAPI_SCALE_ZERO_DEFAULT_ARCH` environment variable
to set the architecture of the nodes they want to default the node group templates to.
The autoscaler will default to `amd64` if it is not set, and the node
group templates may not match the nodes' architecture, specifically when
the workload triggering the scale-up uses a node affinity predicate checking
The autoscaler will default to `amd64` if it is not set, and the node
group templates may not match the nodes' architecture, specifically when
the workload triggering the scale-up uses a node affinity predicate checking
for the node's architecture.

## Specifying a Custom Resource Group
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,12 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
if err != nil {
return nil, err
}
csiNode := ng.scalableResource.InstanceCSINode()

nodeInfo := framework.NewNodeInfo(&node, resourceSlices, &framework.PodInfo{Pod: cloudprovider.BuildKubeProxy(ng.scalableResource.Name())})
if csiNode != nil {
nodeInfo.AddCSINode(csiNode)
}
return nodeInfo, nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ import (

"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config"
gpuapis "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/client-go/tools/cache"
"k8s.io/utils/ptr"
)

const (
Expand Down Expand Up @@ -1500,6 +1502,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
expectedCapacity map[corev1.ResourceName]int64
expectedNodeLabels map[string]string
expectedResourceSlice testResourceSlice
expectedCSINode *storagev1.CSINode
}

testCases := []struct {
Expand Down Expand Up @@ -1650,6 +1653,49 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
},
},
},
{
name: "When the NodeGroup can scale from zero and CSI driver annotations are present, it creates CSINode with driver information",
nodeGroupAnnotations: map[string]string{
memoryKey: "2048Mi",
cpuKey: "2",
csiDriverKey: "ebs.csi.aws.com=25,efs.csi.aws.com=16",
},
config: testCaseConfig{
expectedErr: nil,
nodeLabels: map[string]string{
"kubernetes.io/os": "linux",
"kubernetes.io/arch": "amd64",
},
expectedCapacity: map[corev1.ResourceName]int64{
corev1.ResourceCPU: 2,
corev1.ResourceMemory: 2048 * 1024 * 1024,
corev1.ResourcePods: 110,
},
expectedNodeLabels: map[string]string{
"kubernetes.io/os": "linux",
"kubernetes.io/arch": "amd64",
"kubernetes.io/hostname": "random value",
},
expectedCSINode: &storagev1.CSINode{
Spec: storagev1.CSINodeSpec{
Drivers: []storagev1.CSINodeDriver{
{
Name: "ebs.csi.aws.com",
Allocatable: &storagev1.VolumeNodeResources{
Count: ptr.To(int32(25)),
},
},
{
Name: "efs.csi.aws.com",
Allocatable: &storagev1.VolumeNodeResources{
Count: ptr.To(int32(16)),
},
},
},
},
},
},
},
}

test := func(t *testing.T, testConfig *TestConfig, config testCaseConfig) {
Expand Down Expand Up @@ -1726,6 +1772,55 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
}
}
}

// Validate CSINode if expected
if config.expectedCSINode != nil {
if nodeInfo.CSINode == nil {
t.Errorf("Expected CSINode to be set, but got nil")
} else {
expectedDrivers := config.expectedCSINode.Spec.Drivers
gotDrivers := nodeInfo.CSINode.Spec.Drivers
if len(expectedDrivers) != len(gotDrivers) {
t.Errorf("Expected %d CSI drivers, but got %d", len(expectedDrivers), len(gotDrivers))
} else {
for i, expectedDriver := range expectedDrivers {
if i >= len(gotDrivers) {
t.Errorf("Expected driver at index %d but got only %d drivers", i, len(gotDrivers))
break
}
gotDriver := gotDrivers[i]
if expectedDriver.Name != gotDriver.Name {
t.Errorf("Expected CSI driver name at index %d to be %s, but got %s", i, expectedDriver.Name, gotDriver.Name)
}
if expectedDriver.Allocatable == nil {
if gotDriver.Allocatable != nil {
t.Errorf("Expected CSI driver Allocatable at index %d to be nil, but got non-nil", i)
}
} else {
if gotDriver.Allocatable == nil {
t.Errorf("Expected CSI driver Allocatable at index %d to be non-nil, but got nil", i)
} else {
if expectedDriver.Allocatable.Count == nil {
if gotDriver.Allocatable.Count != nil {
t.Errorf("Expected CSI driver Count at index %d to be nil, but got %d", i, *gotDriver.Allocatable.Count)
}
} else {
if gotDriver.Allocatable.Count == nil {
t.Errorf("Expected CSI driver Count at index %d to be %d, but got nil", i, *expectedDriver.Allocatable.Count)
} else if *expectedDriver.Allocatable.Count != *gotDriver.Allocatable.Count {
t.Errorf("Expected CSI driver Count at index %d to be %d, but got %d", i, *expectedDriver.Allocatable.Count, *gotDriver.Allocatable.Count)
}
}
}
}
}
}
}
} else {
if nodeInfo.CSINode != nil {
t.Errorf("Expected CSINode to be nil, but got non-nil with %d drivers", len(nodeInfo.CSINode.Spec.Drivers))
}
}
}

for _, tc := range testCases {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
apiv1 "k8s.io/api/core/v1"
corev1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
Expand Down Expand Up @@ -413,6 +414,27 @@ func (r unstructuredScalableResource) InstanceDRADriver() string {
return parseDRADriver(r.unstructured.GetAnnotations())
}

// InstanceCSINode parses CSI driver information from annotations and returns
// a CSINode object with the list of installed drivers and their volume limits.
// The annotation format is "driver-name=volume-limit,driver-name2=volume-limit2".
// Returns nil if the annotation is not present or empty.
func (r unstructuredScalableResource) InstanceCSINode() *storagev1.CSINode {
annotations := r.unstructured.GetAnnotations()
// annotation value of the form "driver1=limit1,driver2=limit2"
if val, found := annotations[csiDriverKey]; found && val != "" {
drivers := parseCSIDriverAnnotation(val)
if len(drivers) == 0 {
return nil
}
return &storagev1.CSINode{
Spec: storagev1.CSINodeSpec{
Drivers: drivers,
},
}
}
return nil
}

func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*unstructured.Unstructured, error) {
// Cache w/ lazy loading of the infrastructure reference resource.
r.infraMutex.RLock()
Expand Down Expand Up @@ -533,6 +555,66 @@ func systemInfoFromInfrastructureObject(infraobj *unstructured.Unstructured) api
return nsi
}

// parseCSIDriverAnnotation parses a comma-separated list of CSI driver name and volume limit
// key/value pairs in the format "driver-name=volume-limit,driver-name2=volume-limit2".
// Returns a slice of CSINodeDriver objects with Name and Allocatable.Count set.
func parseCSIDriverAnnotation(annotationValue string) []storagev1.CSINodeDriver {
drivers := []storagev1.CSINodeDriver{}
if annotationValue == "" {
return drivers
}

driverSpecs := strings.Split(annotationValue, ",")
for _, driverSpec := range driverSpecs {
driverSpec = strings.TrimSpace(driverSpec)
if driverSpec == "" {
continue
}

// Split on "=" to get driver name and volume limit
parts := strings.SplitN(driverSpec, "=", 2)
if len(parts) != 2 {
klog.V(4).Infof("Invalid CSI driver spec format (expected driver-name=volume-limit): %s", driverSpec)
continue
}

driverName := strings.TrimSpace(parts[0])
volumeLimitStr := strings.TrimSpace(parts[1])

if driverName == "" {
klog.V(4).Infof("Empty driver name in CSI driver spec: %s", driverSpec)
continue
}

// Parse volume limit as integer
volumeLimit, err := strconv.ParseInt(volumeLimitStr, 10, 32)
if err != nil {
klog.V(4).Infof("Invalid volume limit value (expected integer) in CSI driver spec %s: %v", driverSpec, err)
continue
}

if volumeLimit < 0 {
klog.V(4).Infof("Volume limit must be non-negative in CSI driver spec: %s", driverSpec)
continue
}

// Create CSINodeDriver with Name and optionally Allocatable.Count
// If volume limit is 0, Allocatable is not set
driver := storagev1.CSINodeDriver{
Name: driverName,
}
if volumeLimit > 0 {
limit := int32(volumeLimit)
driver.Allocatable = &storagev1.VolumeNodeResources{
Count: &limit,
}
}
drivers = append(drivers, driver)
}

return drivers
}

// adapted from https://github.com/kubernetes/kubernetes/blob/release-1.25/pkg/util/taints/taints.go#L39
func parseTaint(st string) (apiv1.Taint, error) {
var taint apiv1.Taint
Expand Down
Loading
Loading