Skip to content

Commit 90eabc6

Browse files
committed
Differentiate provisioning requests using Parameters field. Keep prefixing as not recommended approach
1 parent 9cac6a4 commit 90eabc6

File tree

13 files changed

+263
-95
lines changed

13 files changed

+263
-95
lines changed

cluster-autoscaler/FAQ.md

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -630,9 +630,17 @@ When using this class, Cluster Autoscaler performs following actions:
630630
Adds a BookingExpired=True condition when the 10-minute reservation period expires.
631631

632632
Since Cluster Autoscaler version 1.33, it is possible to configure the autoscaler
633-
to process only those check capacity ProvisioningRequests, that have a prefix matching the `--check-capacity-provisioning-class-prefix=<prefix>` flag.
634-
This allows to run two Cluster Autoscalers in the cluster, but instance with the configured prefix
633+
to process only subset of check capacity ProvisioningRequests and ignore the rest.
634+
It should be done with caution by specifying `--check-capacity-processor-instance=<name>` flag.
635+
Then, ProvReq Parameters map should contain a key "processorInstance" with a value equal to the configured instance name.
636+
637+
This allows to run two Cluster Autoscalers in the cluster, but the second instance (likely this with configured instance name)
635638
**should only** handle check capacity ProvisioningRequests and not overlap node groups with the main instance.
639+
It is responsibility of the user to ensure the capacity checks are not overlapping.
640+
Best-effort atomic ProvisioningRequests processing is disabled in the instance that has this flag set.
641+
642+
For backwards compatibility, it is possible to differentiate the ProvReqs by prefixing provisioningClassName with the instance name,
643+
but it is **not recommended** and will be removed in CA 1.35.
636644

637645
* `best-effort-atomic-scale-up.autoscaling.x-k8s.io` (supported from Cluster Autoscaler version 1.30.2 or later).
638646
When using this class, Cluster Autoscaler performs following actions:
@@ -978,7 +986,7 @@ The following startup parameters are supported for cluster autoscaler:
978986
| `bulk-mig-instances-listing-enabled` | Fetch GCE mig instances in bulk instead of per mig | |
979987
| `bypassed-scheduler-names` | Names of schedulers to bypass. If set to non-empty value, CA will not wait for pods to reach a certain age before triggering a scale-up. | |
980988
| `check-capacity-batch-processing` | Whether to enable batch processing for check capacity requests. | |
981-
| `check-capacity-provisioning-class-prefix` | Prefix of provisioningClassName that will be filtered by processors. Only ProvisioningRequests with this prefix in their class will be processed by this CA. It refers only to check capacity ProvisioningRequests. | |
989+
| `check-capacity-processor-instance` | Name of the processor instance. Only ProvisioningRequests that define this name in their parameters with the key "processorInstance" will be processed by this CA instance. It only refers to check capacity ProvisioningRequests, but if not empty, best-effort atomic ProvisioningRequests processing is disabled in this instance. Not recommended: Until CA 1.35, ProvisioningRequests with this name as prefix in their class will be also processed. | |
982990
| `check-capacity-provisioning-request-batch-timebox` | Maximum time to process a batch of provisioning requests. | 10s |
983991
| `check-capacity-provisioning-request-max-batch-size` | Maximum number of provisioning requests to process in a single batch. | 10 |
984992
| `cloud-config` | The path to the cloud provider configuration file. Empty string for no configuration file. | |
@@ -1022,7 +1030,13 @@ The following startup parameters are supported for cluster autoscaler:
10221030
| `kube-client-qps` | QPS value for kubernetes client. | 5 |
10231031
| `kubeconfig` | Path to kubeconfig file with authorization and master location information. | |
10241032
| `kubernetes` | Kubernetes master location. Leave blank for default | |
1025-
| `lease-resource-name` | The lease resource to use in leader election. | "cluster-autoscaler" |
1033+
| `leader-elect` | Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability. | true |
1034+
| `leader-elect-lease-duration` | The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled. | 15s |
1035+
| `leader-elect-renew-deadline` | The interval between attempts by the acting master to renew a leadership slot before it stops leading. This must be less than the lease duration. This is only applicable if leader election is enabled. | 10s |
1036+
| `leader-elect-resource-lock` | The type of resource object that is used for locking during leader election. Supported options are 'leases'. | "leases" |
1037+
| `leader-elect-resource-name` | The name of resource object that is used for locking during leader election. | "cluster-autoscaler" |
1038+
| `leader-elect-resource-namespace` | The namespace of resource object that is used for locking during leader election. | |
1039+
| `leader-elect-retry-period` | The duration the clients should wait between attempting acquisition and renewal of a leadership. This is only applicable if leader election is enabled. | 2s |
10261040
| `log-backtrace-at` | when logging hits line file:N, emit a stack trace | :0 |
10271041
| `log-dir` | If non-empty, write log files in this directory (no effect when -logtostderr=true) | |
10281042
| `log-file` | If non-empty, use this log file (no effect when -logtostderr=true) | |

cluster-autoscaler/config/autoscaling_options.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -313,10 +313,11 @@ type AutoscalingOptions struct {
313313
DynamicResourceAllocationEnabled bool
314314
// ClusterSnapshotParallelism is the maximum parallelism of cluster snapshot creation.
315315
ClusterSnapshotParallelism int
316-
// CheckCapacityProvisioningClassPrefix is the prefix of provisioningClassName that will be filtered by processors.
317-
// Only ProvisioningRequests with this prefix in their class will be processed by this CA.
318-
// It only refers to check capacity ProvisioningRequests.
319-
CheckCapacityProvisioningClassPrefix string
316+
// CheckCapacityProcessorInstance is the name of the processor instance.
317+
// Only ProvisioningRequests that define this name in their parameters with the key "processorInstance" will be processed by this CA instance.
318+
// It only refers to check capacity ProvisioningRequests, but if not empty, best-effort atomic ProvisioningRequests processing is disabled in this instance.
319+
// Not recommended: Until CA 1.35, ProvisioningRequests with this name as prefix in their class will be also processed.
320+
CheckCapacityProcessorInstance string
320321
}
321322

322323
// KubeClientOptions specify options for kube client

cluster-autoscaler/main.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ var (
283283
forceDeleteLongUnregisteredNodes = flag.Bool("force-delete-unregistered-nodes", false, "Whether to enable force deletion of long unregistered nodes, regardless of the min size of the node group the belong to.")
284284
enableDynamicResourceAllocation = flag.Bool("enable-dynamic-resource-allocation", false, "Whether logic for handling DRA (Dynamic Resource Allocation) objects is enabled.")
285285
clusterSnapshotParallelism = flag.Int("cluster-snapshot-parallelism", 16, "Maximum parallelism of cluster snapshot creation.")
286-
checkCapacityProvisioningClassPrefix = flag.String("check-capacity-provisioning-class-prefix", "", "Prefix of provisioningClassName that will be filtered by processors. Only ProvisioningRequests with this prefix in their class will be processed by this CA. It refers only to check capacity ProvisioningRequests.")
286+
checkCapacityProcessorInstance = flag.String("check-capacity-processor-instance", "", "Name of the processor instance. Only ProvisioningRequests that define this name in their parameters with the key \"processorInstance\" will be processed by this CA instance. It only refers to check capacity ProvisioningRequests, but if not empty, best-effort atomic ProvisioningRequests processing is disabled in this instance. Not recommended: Until CA 1.35, ProvisioningRequests with this name as prefix in their class will be also processed.")
287287
)
288288

289289
func isFlagPassed(name string) bool {
@@ -465,7 +465,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
465465
ForceDeleteLongUnregisteredNodes: *forceDeleteLongUnregisteredNodes,
466466
DynamicResourceAllocationEnabled: *enableDynamicResourceAllocation,
467467
ClusterSnapshotParallelism: *clusterSnapshotParallelism,
468-
CheckCapacityProvisioningClassPrefix: *checkCapacityProvisioningClassPrefix,
468+
CheckCapacityProcessorInstance: *checkCapacityProcessorInstance,
469469
}
470470
}
471471

@@ -541,7 +541,7 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot
541541
return nil, nil, err
542542
}
543543

544-
ProvisioningRequestInjector, err = provreq.NewProvisioningRequestPodsInjector(restConfig, opts.ProvisioningRequestInitialBackoffTime, opts.ProvisioningRequestMaxBackoffTime, opts.ProvisioningRequestMaxBackoffCacheSize, opts.CheckCapacityBatchProcessing, opts.CheckCapacityProvisioningClassPrefix)
544+
ProvisioningRequestInjector, err = provreq.NewProvisioningRequestPodsInjector(restConfig, opts.ProvisioningRequestInitialBackoffTime, opts.ProvisioningRequestMaxBackoffTime, opts.ProvisioningRequestMaxBackoffCacheSize, opts.CheckCapacityBatchProcessing, opts.CheckCapacityProcessorInstance)
545545
if err != nil {
546546
return nil, nil, err
547547
}
@@ -560,7 +560,7 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot
560560

561561
scaleUpOrchestrator := provreqorchestrator.NewWrapperOrchestrator(provreqOrchestrator)
562562
opts.ScaleUpOrchestrator = scaleUpOrchestrator
563-
provreqProcesor := provreq.NewProvReqProcessor(client, opts.CheckCapacityProvisioningClassPrefix)
563+
provreqProcesor := provreq.NewProvReqProcessor(client, opts.CheckCapacityProcessorInstance)
564564
opts.LoopStartNotifier = loopstart.NewObserversList([]loopstart.Observer{provreqProcesor})
565565

566566
podListProcessor.AddProcessor(provreqProcesor)

cluster-autoscaler/processors/provreq/injector.go

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,14 @@ import (
3737

3838
// ProvisioningRequestPodsInjector creates in-memory pods from ProvisioningRequest and inject them to unscheduled pods list.
3939
type ProvisioningRequestPodsInjector struct {
40-
initialRetryTime time.Duration
41-
maxBackoffTime time.Duration
42-
backoffDuration *lru.Cache
43-
clock clock.PassiveClock
44-
client *provreqclient.ProvisioningRequestClient
45-
lastProvisioningRequestProcessTime time.Time
46-
checkCapacityBatchProcessing bool
47-
checkCapacityProvisioningClassPrefix string
40+
initialRetryTime time.Duration
41+
maxBackoffTime time.Duration
42+
backoffDuration *lru.Cache
43+
clock clock.PassiveClock
44+
client *provreqclient.ProvisioningRequestClient
45+
lastProvisioningRequestProcessTime time.Time
46+
checkCapacityBatchProcessing bool
47+
checkCapacityProcessorInstance string
4848
}
4949

5050
// IsAvailableForProvisioning checks if the provisioning request is the correct state for processing and provisioning has not been attempted recently.
@@ -95,13 +95,17 @@ func (p *ProvisioningRequestPodsInjector) MarkAsFailed(pr *provreqwrapper.Provis
9595
}
9696

9797
func (p *ProvisioningRequestPodsInjector) isSupportedClass(pr *provreqwrapper.ProvisioningRequest) bool {
98-
return provisioningrequest.SupportedProvisioningClass(pr.Spec.ProvisioningClassName, p.checkCapacityProvisioningClassPrefix)
98+
return provisioningrequest.SupportedProvisioningClass(pr.ProvisioningRequest, p.checkCapacityProcessorInstance)
99+
}
100+
101+
func (p *ProvisioningRequestPodsInjector) isSupportedCheckCapacityClass(pr *provreqwrapper.ProvisioningRequest) bool {
102+
return provisioningrequest.SupportedCheckCapacityClass(pr.ProvisioningRequest, p.checkCapacityProcessorInstance)
99103
}
100104

101105
func (p *ProvisioningRequestPodsInjector) shouldMarkAsAccepted(pr *provreqwrapper.ProvisioningRequest) bool {
102106
// Don't mark as accepted the check capacity ProvReq when batch processing is enabled.
103107
// It will be marked later, in parallel, during processing the requests.
104-
return !p.checkCapacityBatchProcessing || !p.matchesCheckCapacityClass(pr.Spec.ProvisioningClassName)
108+
return !p.checkCapacityBatchProcessing || !p.isSupportedCheckCapacityClass(pr)
105109
}
106110

107111
// GetPodsFromNextRequest picks one ProvisioningRequest meeting the condition passed using isSupportedClass function, marks it as accepted and returns pods from it.
@@ -145,10 +149,6 @@ type ProvisioningRequestWithPods struct {
145149
Pods []*apiv1.Pod
146150
}
147151

148-
func (p *ProvisioningRequestPodsInjector) matchesCheckCapacityClass(provisioningClassName string) bool {
149-
return provisioningClassName == p.checkCapacityProvisioningClassPrefix+v1.ProvisioningClassCheckCapacity
150-
}
151-
152152
// GetCheckCapacityBatch returns up to the requested number of ProvisioningRequestWithPods.
153153
// We do not mark the PRs as accepted here.
154154
// If we fail to get the pods for a PR, we mark the PR as failed and issue an update.
@@ -162,7 +162,7 @@ func (p *ProvisioningRequestPodsInjector) GetCheckCapacityBatch(maxPrs int) ([]P
162162
if len(prsWithPods) >= maxPrs {
163163
break
164164
}
165-
if !p.matchesCheckCapacityClass(pr.Spec.ProvisioningClassName) {
165+
if !p.isSupportedCheckCapacityClass(pr) {
166166
continue
167167
}
168168
if !p.IsAvailableForProvisioning(pr) {
@@ -197,20 +197,20 @@ func (p *ProvisioningRequestPodsInjector) Process(
197197
func (p *ProvisioningRequestPodsInjector) CleanUp() {}
198198

199199
// NewProvisioningRequestPodsInjector creates a ProvisioningRequest filter processor.
200-
func NewProvisioningRequestPodsInjector(kubeConfig *rest.Config, initialBackoffTime, maxBackoffTime time.Duration, maxCacheSize int, checkCapacityBatchProcessing bool, checkCapacityProvisioningClassPrefix string) (*ProvisioningRequestPodsInjector, error) {
200+
func NewProvisioningRequestPodsInjector(kubeConfig *rest.Config, initialBackoffTime, maxBackoffTime time.Duration, maxCacheSize int, checkCapacityBatchProcessing bool, checkCapacityProcessorInstance string) (*ProvisioningRequestPodsInjector, error) {
201201
client, err := provreqclient.NewProvisioningRequestClient(kubeConfig)
202202
if err != nil {
203203
return nil, err
204204
}
205205
return &ProvisioningRequestPodsInjector{
206-
initialRetryTime: initialBackoffTime,
207-
maxBackoffTime: maxBackoffTime,
208-
backoffDuration: lru.New(maxCacheSize),
209-
client: client,
210-
clock: clock.RealClock{},
211-
lastProvisioningRequestProcessTime: time.Now(),
212-
checkCapacityBatchProcessing: checkCapacityBatchProcessing,
213-
checkCapacityProvisioningClassPrefix: checkCapacityProvisioningClassPrefix,
206+
initialRetryTime: initialBackoffTime,
207+
maxBackoffTime: maxBackoffTime,
208+
backoffDuration: lru.New(maxCacheSize),
209+
client: client,
210+
clock: clock.RealClock{},
211+
lastProvisioningRequestProcessTime: time.Now(),
212+
checkCapacityBatchProcessing: checkCapacityBatchProcessing,
213+
checkCapacityProcessorInstance: checkCapacityProcessorInstance,
214214
}, nil
215215
}
216216

0 commit comments

Comments
 (0)