kubernetes
diff --git a/‎cluster-autoscaler/Makefile‎
Lines changed: 1 addition & 1 deletion b/‎cluster-autoscaler/Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cluster-autoscaler/cluster-autoscaler-code-analysis.org‎
Lines changed: 1820 additions & 0 deletions b/‎cluster-autoscaler/cluster-autoscaler-code-analysis.org‎
Lines changed: 1820 additions & 0 deletions
diff --git a/‎cluster-autoscaler/config/autoscaling_options.go‎
Lines changed: 2 additions & 0 deletions b/‎cluster-autoscaler/config/autoscaling_options.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎cluster-autoscaler/config/flags/flags.go‎
Lines changed: 2 additions & 0 deletions b/‎cluster-autoscaler/config/flags/flags.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎cluster-autoscaler/context/autoscaling_context.go‎
Lines changed: 5 additions & 0 deletions b/‎cluster-autoscaler/context/autoscaling_context.go‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎cluster-autoscaler/core/autoscaler.go‎
Lines changed: 2 additions & 1 deletion b/‎cluster-autoscaler/core/autoscaler.go‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎cluster-autoscaler/core/options/autoscaler.go‎
Lines changed: 2 additions & 0 deletions b/‎cluster-autoscaler/core/options/autoscaler.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎cluster-autoscaler/core/scaledown/actuation/actuator.go‎
Lines changed: 11 additions & 2 deletions b/‎cluster-autoscaler/core/scaledown/actuation/actuator.go‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go‎
Lines changed: 6 additions & 1 deletion b/‎cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎cluster-autoscaler/core/static_autoscaler.go‎
Lines changed: 16 additions & 3 deletions b/‎cluster-autoscaler/core/static_autoscaler.go‎
Lines changed: 16 additions & 3 deletions
@@ -45,7 +45,7 @@ build:
 	@$(MAKE) build-arch-$(GOARCH)
 
 build-arch-%: clean-arch-%
-	$(ENVVAR) GOOS=$(GOOS) GOARCH=$* go build -o cluster-autoscaler-$* ${LDFLAGS_FLAG} ${TAGS_FLAG}
+	$(ENVVAR) GOOS=$(GOOS) GOARCH=$* go build -o cluster-autoscaler-$* -mod=vendor ${LDFLAGS_FLAG} ${TAGS_FLAG}
 
 test-build-tags:
 	@if [ -z "$(SUPPORTED_BUILD_TAGS)" ]; then \
 
@@ -314,6 +314,8 @@ type AutoscalingOptions struct {
 	ForceDeleteFailedNodes bool
 	// DynamicResourceAllocationEnabled configures whether logic for handling DRA objects is enabled.
 	DynamicResourceAllocationEnabled bool
+	// CSINodeAwareSchedulingEnabled configures whether logic for handling CSINode objects is enabled.
+	CSINodeAwareSchedulingEnabled bool
 	// ClusterSnapshotParallelism is the maximum parallelism of cluster snapshot creation.
 	ClusterSnapshotParallelism int
 	// CheckCapacityProcessorInstance is the name of the processor instance.
 
@@ -226,6 +226,7 @@ var (
 	forceDeleteLongUnregisteredNodes             = flag.Bool("force-delete-unregistered-nodes", false, "Whether to enable force deletion of long unregistered nodes, regardless of the min size of the node group the belong to.")
 	forceDeleteFailedNodes                       = flag.Bool("force-delete-failed-nodes", false, "Whether to enable force deletion of failed nodes, regardless of the min size of the node group the belong to.")
 	enableDynamicResourceAllocation              = flag.Bool("enable-dynamic-resource-allocation", false, "Whether logic for handling DRA (Dynamic Resource Allocation) objects is enabled.")
+	enableCSINodeAwareScheduling                 = flag.Bool("enable-csi-node-aware-scheduling", false, "Whether logic for handling CSINode objects is enabled.")
 	clusterSnapshotParallelism                   = flag.Int("cluster-snapshot-parallelism", 16, "Maximum parallelism of cluster snapshot creation.")
 	checkCapacityProcessorInstance               = flag.String("check-capacity-processor-instance", "", "Name of the processor instance. Only ProvisioningRequests that define this name in their parameters with the key \"processorInstance\" will be processed by this CA instance. It only refers to check capacity ProvisioningRequests, but if not empty, best-effort atomic ProvisioningRequests processing is disabled in this instance. Not recommended: Until CA 1.35, ProvisioningRequests with this name as prefix in their class will be also processed.")
 	nodeDeletionCandidateTTL                     = flag.Duration("node-deletion-candidate-ttl", time.Duration(0), "Maximum time a node can be marked as removable before the marking becomes stale. This sets the TTL of Cluster-Autoscaler's state if the Cluste-Autoscaler deployment becomes inactive")
@@ -399,6 +400,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
 		ForceDeleteLongUnregisteredNodes:             *forceDeleteLongUnregisteredNodes,
 		ForceDeleteFailedNodes:                       *forceDeleteFailedNodes,
 		DynamicResourceAllocationEnabled:             *enableDynamicResourceAllocation,
+		CSINodeAwareSchedulingEnabled:                *enableCSINodeAwareScheduling,
 		ClusterSnapshotParallelism:                   *clusterSnapshotParallelism,
 		CheckCapacityProcessorInstance:               *checkCapacityProcessorInstance,
 		MaxInactivityTime:                            *maxInactivityTimeFlag,
 
@@ -27,6 +27,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/expander"
 	processor_callbacks "k8s.io/autoscaler/cluster-autoscaler/processors/callbacks"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
+	csinodeprovider "k8s.io/autoscaler/cluster-autoscaler/simulator/csi/provider"
 	draprovider "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/provider"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
 	kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
@@ -65,6 +66,8 @@ type AutoscalingContext struct {
 	ProvisioningRequestScaleUpMode bool
 	// DraProvider is the provider for dynamic resources allocation.
 	DraProvider *draprovider.Provider
+	// CsiProvider is the provider for CSI node aware scheduling.
+	CsiProvider *csinodeprovider.Provider
 }
 
 // AutoscalingKubeClients contains all Kubernetes API clients,
@@ -112,6 +115,7 @@ func NewAutoscalingContext(
 	remainingPdbTracker pdb.RemainingPdbTracker,
 	clusterStateRegistry *clusterstate.ClusterStateRegistry,
 	draProvider *draprovider.Provider,
+	csiProvider *csinodeprovider.Provider,
 ) *AutoscalingContext {
 	return &AutoscalingContext{
 		AutoscalingOptions:     options,
@@ -125,6 +129,7 @@ func NewAutoscalingContext(
 		RemainingPdbTracker:    remainingPdbTracker,
 		ClusterStateRegistry:   clusterStateRegistry,
 		DraProvider:            draProvider,
+		CsiProvider:            csiProvider,
 	}
 }
 
 
@@ -76,6 +76,7 @@ func NewAutoscaler(opts coreoptions.AutoscalerOptions, informerFactory informers
 		opts.DeleteOptions,
 		opts.DrainabilityRules,
 		opts.DraProvider,
+		opts.CsiProvider,
 	), nil
 }
 
@@ -98,7 +99,7 @@ func initializeDefaultOptions(opts *coreoptions.AutoscalerOptions, informerFacto
 		opts.FrameworkHandle = fwHandle
 	}
 	if opts.ClusterSnapshot == nil {
-		opts.ClusterSnapshot = predicate.NewPredicateSnapshot(store.NewBasicSnapshotStore(), opts.FrameworkHandle, opts.DynamicResourceAllocationEnabled)
+		opts.ClusterSnapshot = predicate.NewPredicateSnapshot(store.NewBasicSnapshotStore(), opts.FrameworkHandle, opts.DynamicResourceAllocationEnabled, opts.CSINodeAwareSchedulingEnabled)
 	}
 	if opts.RemainingPdbTracker == nil {
 		opts.RemainingPdbTracker = pdb.NewBasicRemainingPdbTracker()
 
@@ -28,6 +28,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/observers/loopstart"
 	ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
+	csinodeprovider "k8s.io/autoscaler/cluster-autoscaler/simulator/csi/provider"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules"
 	draprovider "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/provider"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
@@ -57,4 +58,5 @@ type AutoscalerOptions struct {
 	DeleteOptions          options.NodeDeleteOptions
 	DrainabilityRules      rules.Rules
 	DraProvider            *draprovider.Provider
+	CsiProvider            *csinodeprovider.Provider
 }
@@ -36,6 +36,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store"
+	csisnapshot "k8s.io/autoscaler/cluster-autoscaler/simulator/csi/snapshot"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules"
 	drasnapshot "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/snapshot"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/options"
@@ -397,7 +398,7 @@ func (a *Actuator) taintNode(node *apiv1.Node) error {
 }
 
 func (a *Actuator) createSnapshot(nodes []*apiv1.Node) (clustersnapshot.ClusterSnapshot, error) {
-	snapshot := predicate.NewPredicateSnapshot(store.NewBasicSnapshotStore(), a.autoscalingCtx.FrameworkHandle, a.autoscalingCtx.DynamicResourceAllocationEnabled)
+	snapshot := predicate.NewPredicateSnapshot(store.NewBasicSnapshotStore(), a.autoscalingCtx.FrameworkHandle, a.autoscalingCtx.DynamicResourceAllocationEnabled, a.autoscalingCtx.CSINodeAwareSchedulingEnabled)
 	pods, err := a.autoscalingCtx.AllPodLister().List()
 	if err != nil {
 		return nil, err
@@ -414,7 +415,15 @@ func (a *Actuator) createSnapshot(nodes []*apiv1.Node) (clustersnapshot.ClusterS
 		}
 	}
 
-	err = snapshot.SetClusterState(nodes, nonExpendableScheduledPods, draSnapshot)
+	var csiSnapshot *csisnapshot.Snapshot
+	if a.autoscalingCtx.CSINodeAwareSchedulingEnabled {
+		csiSnapshot, err = a.autoscalingCtx.CsiProvider.Snapshot()
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	err = snapshot.SetClusterState(nodes, nonExpendableScheduledPods, draSnapshot, csiSnapshot)
 	if err != nil {
 		return nil, err
 	}
 
@@ -107,7 +107,7 @@ func (o *ScaleUpOrchestrator) ScaleUp(
 	if aErr != nil {
 		return status.UpdateScaleUpError(&status.ScaleUpStatus{}, aErr.AddPrefix("could not get upcoming nodes: "))
 	}
-	klog.V(4).Infof("Upcoming %d nodes", len(upcomingNodes))
+	klog.V(4).Infof("hemant Upcoming %d nodes", len(upcomingNodes))
 
 	nodeGroups := o.autoscalingCtx.CloudProvider.NodeGroups()
 	if o.processors != nil && o.processors.NodeGroupListProcessor != nil {
@@ -135,11 +135,14 @@ func (o *ScaleUpOrchestrator) ScaleUp(
 	for nodegroupID := range skippedNodeGroups {
 		o.processors.BinpackingLimiter.MarkProcessed(o.autoscalingCtx, nodegroupID)
 	}
+	klog.V(4).Infof("hemant validNodeGroups %d", len(validNodeGroups))
 
 	// Calculate expansion options
 	schedulablePodGroups := map[string][]estimator.PodEquivalenceGroup{}
 	var options []expander.Option
 
+	// This code here runs a simulation to see which pods can be scheduled on which node groups.
+	// TODO: Fix bug with CSI node not being added to the simulation.
 	for _, nodeGroup := range validNodeGroups {
 		schedulablePodGroups[nodeGroup.Id()] = o.SchedulablePodGroups(podEquivalenceGroups, nodeGroup, nodeInfos[nodeGroup.Id()])
 	}
@@ -150,6 +153,7 @@ func (o *ScaleUpOrchestrator) ScaleUp(
 
 		if len(option.Pods) == 0 || option.NodeCount == 0 {
 			klog.V(4).Infof("No pod can fit to %s", nodeGroup.Id())
+			klog.Infof("hemant no pod can fit to %s", nodeGroup.Id())
 		} else if allOrNothing && len(option.Pods) < len(unschedulablePods) {
 			klog.V(4).Infof("Some pods can't fit to %s, giving up due to all-or-nothing scale-up strategy", nodeGroup.Id())
 		} else {
@@ -486,6 +490,7 @@ func (o *ScaleUpOrchestrator) ComputeExpansionOption(
 		o.autoscalingCtx.ClusterSnapshot,
 		estimator.NewEstimationContext(o.autoscalingCtx.MaxNodesTotal, option.SimilarNodeGroups, currentNodeCount),
 	)
+	klog.Infof("hemant about to run estimater for node group %s", nodeGroup.Id())
 	option.NodeCount, option.Pods = expansionEstimator.Estimate(podGroups, nodeInfo, nodeGroup)
 	metrics.UpdateDurationFromStart(metrics.Estimate, estimateStart)
 
 
@@ -45,6 +45,8 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/processors/status"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
+	csinodeprovider "k8s.io/autoscaler/cluster-autoscaler/simulator/csi/provider"
+	csisnapshot "k8s.io/autoscaler/cluster-autoscaler/simulator/csi/snapshot"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules"
 	draprovider "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/provider"
 	drasnapshot "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/snapshot"
@@ -141,7 +143,8 @@ func NewStaticAutoscaler(
 	scaleUpOrchestrator scaleup.Orchestrator,
 	deleteOptions options.NodeDeleteOptions,
 	drainabilityRules rules.Rules,
-	draProvider *draprovider.Provider) *StaticAutoscaler {
+	draProvider *draprovider.Provider,
+	csiProvider *csinodeprovider.Provider) *StaticAutoscaler {
 
 	klog.V(4).Infof("Creating new static autoscaler with opts: %v", opts)
 
@@ -162,7 +165,8 @@ func NewStaticAutoscaler(
 		debuggingSnapshotter,
 		remainingPdbTracker,
 		clusterStateRegistry,
-		draProvider)
+		draProvider,
+		csiProvider)
 
 	taintConfig := taints.NewTaintConfig(opts)
 	processors.ScaleDownCandidatesNotifier.Register(clusterStateRegistry)
@@ -280,6 +284,15 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
 		}
 	}
 
+	var csiSnapshot *csisnapshot.Snapshot
+	if a.AutoscalingContext.CsiProvider != nil {
+		var err error
+		csiSnapshot, err = a.AutoscalingContext.CsiProvider.Snapshot()
+		if err != nil {
+			return caerrors.ToAutoscalerError(caerrors.ApiCallError, err)
+		}
+	}
+
 	// Get nodes and pods currently living on cluster
 	allNodes, readyNodes, typedErr := a.obtainNodeLists(draSnapshot)
 	if typedErr != nil {
@@ -340,7 +353,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
 	}
 	nonExpendableScheduledPods := core_utils.FilterOutExpendablePods(originalScheduledPods, a.ExpendablePodsPriorityCutoff)
 
-	if err := a.ClusterSnapshot.SetClusterState(allNodes, nonExpendableScheduledPods, draSnapshot); err != nil {
+	if err := a.ClusterSnapshot.SetClusterState(allNodes, nonExpendableScheduledPods, draSnapshot, csiSnapshot); err != nil {
 		return caerrors.ToAutoscalerError(caerrors.InternalError, err).AddPrefix("failed to initialize ClusterSnapshot: ")
 	}
 	// Initialize Pod Disruption Budget tracking
Original file line number	Diff line number	Diff line change
`@@ -76,6 +76,7 @@ func NewAutoscaler(opts coreoptions.AutoscalerOptions, informerFactory informers`
`76`	`76`	`opts.DeleteOptions,`
`77`	`77`	`opts.DrainabilityRules,`
`78`	`78`	`opts.DraProvider,`
	`79`	`+ opts.CsiProvider,`
`79`	`80`	`), nil`
`80`	`81`	`}`
`81`	`82`
`@@ -98,7 +99,7 @@ func initializeDefaultOptions(opts *coreoptions.AutoscalerOptions, informerFacto`
`98`	`99`	`opts.FrameworkHandle = fwHandle`
`99`	`100`	`}`
`100`	`101`	`if opts.ClusterSnapshot == nil {`
`101`		`- opts.ClusterSnapshot = predicate.NewPredicateSnapshot(store.NewBasicSnapshotStore(), opts.FrameworkHandle, opts.DynamicResourceAllocationEnabled)`
	`102`	`+ opts.ClusterSnapshot = predicate.NewPredicateSnapshot(store.NewBasicSnapshotStore(), opts.FrameworkHandle, opts.DynamicResourceAllocationEnabled, opts.CSINodeAwareSchedulingEnabled)`
`102`	`103`	`}`
`103`	`104`	`if opts.RemainingPdbTracker == nil {`
`104`	`105`	`opts.RemainingPdbTracker = pdb.NewBasicRemainingPdbTracker()`