Skip to content

Commit b7e8857

Browse files
authored
Merge pull request #8662 from norbertcyran/new-resource-limits
[Granular resource limits] Add support for granular resource quotas
2 parents 3ac2b95 + d05d8df commit b7e8857

File tree

10 files changed

+1391
-1
lines changed

10 files changed

+1391
-1
lines changed

cluster-autoscaler/cloudprovider/resource_limiter.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ package cloudprovider
1818

1919
import (
2020
"fmt"
21-
"k8s.io/apimachinery/pkg/util/sets"
2221
"math"
2322
"strings"
23+
24+
apiv1 "k8s.io/api/core/v1"
25+
"k8s.io/apimachinery/pkg/util/sets"
2426
)
2527

2628
// ResourceLimiter contains limits (max, min) for resources (cores, memory etc.).
@@ -29,6 +31,11 @@ type ResourceLimiter struct {
2931
maxLimits map[string]int64
3032
}
3133

34+
// ID returns the identifier of the limiter.
35+
func (r *ResourceLimiter) ID() string {
36+
return "cluster-wide"
37+
}
38+
3239
// NewResourceLimiter creates new ResourceLimiter for map. Maps are deep copied.
3340
func NewResourceLimiter(minLimits map[string]int64, maxLimits map[string]int64) *ResourceLimiter {
3441
minLimitsCopy := make(map[string]int64)
@@ -88,3 +95,18 @@ func (r *ResourceLimiter) String() string {
8895
}
8996
return strings.Join(resourceDetails, ", ")
9097
}
98+
99+
// AppliesTo checks if the limiter applies to node.
100+
//
101+
// As this is a compatibility layer for cluster-wide limits, it always returns true.
102+
func (r *ResourceLimiter) AppliesTo(node *apiv1.Node) bool {
103+
return true
104+
}
105+
106+
// Limits returns max limits of the limiter.
107+
//
108+
// New resource quotas system supports only max limits, therefore only max limits
109+
// are returned here.
110+
func (r *ResourceLimiter) Limits() map[string]int64 {
111+
return r.maxLimits
112+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package resourcequotas
18+
19+
import (
20+
corev1 "k8s.io/api/core/v1"
21+
"k8s.io/autoscaler/cluster-autoscaler/context"
22+
"k8s.io/autoscaler/cluster-autoscaler/processors/customresources"
23+
)
24+
25+
// TrackerFactory builds quota trackers.
26+
type TrackerFactory struct {
27+
crp customresources.CustomResourcesProcessor
28+
quotasProvider Provider
29+
usageCalculator *usageCalculator
30+
}
31+
32+
// TrackerOptions stores configuration for quota tracking.
33+
type TrackerOptions struct {
34+
CustomResourcesProcessor customresources.CustomResourcesProcessor
35+
QuotaProvider Provider
36+
NodeFilter NodeFilter
37+
}
38+
39+
// NewTrackerFactory creates a new TrackerFactory.
40+
func NewTrackerFactory(opts TrackerOptions) *TrackerFactory {
41+
uc := newUsageCalculator(opts.CustomResourcesProcessor, opts.NodeFilter)
42+
return &TrackerFactory{
43+
crp: opts.CustomResourcesProcessor,
44+
quotasProvider: opts.QuotaProvider,
45+
usageCalculator: uc,
46+
}
47+
}
48+
49+
// NewQuotasTracker builds a new Tracker.
50+
//
51+
// NewQuotasTracker calculates resources used by the nodes for every
52+
// quota returned by the Provider. Then, based on usages and limits it calculates
53+
// how many resources can be still added to the cluster. Returns a Tracker object.
54+
func (f *TrackerFactory) NewQuotasTracker(autoscalingCtx *context.AutoscalingContext, nodes []*corev1.Node) (*Tracker, error) {
55+
quotas, err := f.quotasProvider.Quotas()
56+
if err != nil {
57+
return nil, err
58+
}
59+
usages, err := f.usageCalculator.calculateUsages(autoscalingCtx, nodes, quotas)
60+
if err != nil {
61+
return nil, err
62+
}
63+
var quotaStatuses []*quotaStatus
64+
for _, rq := range quotas {
65+
limitsLeft := make(resourceList)
66+
limits := rq.Limits()
67+
for resourceType, limit := range limits {
68+
usage := usages[rq.ID()][resourceType]
69+
limitsLeft[resourceType] = max(0, limit-usage)
70+
}
71+
quotaStatuses = append(quotaStatuses, &quotaStatus{
72+
quota: rq,
73+
limitsLeft: limitsLeft,
74+
})
75+
}
76+
tracker := newTracker(f.crp, quotaStatuses)
77+
return tracker, nil
78+
}
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package resourcequotas
18+
19+
import (
20+
"testing"
21+
22+
"github.com/google/go-cmp/cmp"
23+
"github.com/google/go-cmp/cmp/cmpopts"
24+
apiv1 "k8s.io/api/core/v1"
25+
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
26+
cptest "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
27+
"k8s.io/autoscaler/cluster-autoscaler/context"
28+
"k8s.io/autoscaler/cluster-autoscaler/processors/customresources"
29+
"k8s.io/autoscaler/cluster-autoscaler/utils/test"
30+
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
31+
)
32+
33+
type nodeExcludeFn func(node *apiv1.Node) bool
34+
35+
func (n nodeExcludeFn) ExcludeFromTracking(node *apiv1.Node) bool {
36+
return n(node)
37+
}
38+
39+
func TestNewQuotasTracker(t *testing.T) {
40+
testCases := []struct {
41+
name string
42+
crp customresources.CustomResourcesProcessor
43+
nodeFilter NodeFilter
44+
nodes []*apiv1.Node
45+
limits map[string]int64
46+
newNode *apiv1.Node
47+
nodeDelta int
48+
wantResult *CheckDeltaResult
49+
}{
50+
{
51+
name: "default config allowed operation",
52+
nodes: []*apiv1.Node{
53+
test.BuildTestNode("n1", 1000, 2*units.GiB),
54+
test.BuildTestNode("n2", 2000, 4*units.GiB),
55+
test.BuildTestNode("n3", 3000, 8*units.GiB),
56+
},
57+
limits: map[string]int64{
58+
"cpu": 12,
59+
"memory": 32 * units.GiB,
60+
},
61+
newNode: test.BuildTestNode("n4", 2000, 4*units.GiB),
62+
nodeDelta: 2,
63+
wantResult: &CheckDeltaResult{
64+
AllowedDelta: 2,
65+
},
66+
},
67+
{
68+
name: "default config exceeded operation",
69+
nodes: []*apiv1.Node{
70+
test.BuildTestNode("n1", 1000, 2*units.GiB),
71+
test.BuildTestNode("n2", 2000, 4*units.GiB),
72+
test.BuildTestNode("n3", 3000, 8*units.GiB),
73+
},
74+
limits: map[string]int64{
75+
"cpu": 6,
76+
"memory": 16 * units.GiB,
77+
},
78+
newNode: test.BuildTestNode("n4", 2000, 4*units.GiB),
79+
nodeDelta: 2,
80+
wantResult: &CheckDeltaResult{
81+
AllowedDelta: 0,
82+
ExceededQuotas: []ExceededQuota{
83+
{ID: "cluster-wide", ExceededResources: []string{"cpu", "memory"}},
84+
},
85+
},
86+
},
87+
{
88+
name: "default config partially allowed operation",
89+
nodes: []*apiv1.Node{
90+
test.BuildTestNode("n1", 1000, 2*units.GiB),
91+
test.BuildTestNode("n2", 2000, 4*units.GiB),
92+
test.BuildTestNode("n3", 3000, 8*units.GiB),
93+
},
94+
limits: map[string]int64{
95+
"cpu": 7,
96+
"memory": 16 * units.GiB,
97+
},
98+
newNode: test.BuildTestNode("n4", 2000, 4*units.GiB),
99+
nodeDelta: 2,
100+
wantResult: &CheckDeltaResult{
101+
AllowedDelta: 0,
102+
ExceededQuotas: []ExceededQuota{
103+
{ID: "cluster-wide", ExceededResources: []string{"cpu", "memory"}},
104+
},
105+
},
106+
},
107+
{
108+
name: "custom resource config allowed operation",
109+
crp: &fakeCustomResourcesProcessor{
110+
NodeResourceTargets: func(n *apiv1.Node) []customresources.CustomResourceTarget {
111+
if n.Name == "n1" {
112+
return []customresources.CustomResourceTarget{
113+
{
114+
ResourceType: "gpu",
115+
ResourceCount: 1,
116+
},
117+
}
118+
}
119+
return nil
120+
},
121+
},
122+
nodes: []*apiv1.Node{
123+
test.BuildTestNode("n1", 1000, 2*units.GiB),
124+
test.BuildTestNode("n2", 2000, 4*units.GiB),
125+
test.BuildTestNode("n3", 3000, 8*units.GiB),
126+
},
127+
limits: map[string]int64{
128+
"cpu": 12,
129+
"memory": 32 * units.GiB,
130+
"gpu": 6,
131+
},
132+
newNode: test.BuildTestNode("n4", 2000, 4*units.GiB),
133+
nodeDelta: 2,
134+
wantResult: &CheckDeltaResult{
135+
AllowedDelta: 2,
136+
},
137+
},
138+
{
139+
name: "custom resource config exceeded operation",
140+
crp: &fakeCustomResourcesProcessor{
141+
NodeResourceTargets: func(n *apiv1.Node) []customresources.CustomResourceTarget {
142+
if n.Name == "n1" || n.Name == "n4" {
143+
return []customresources.CustomResourceTarget{
144+
{
145+
ResourceType: "gpu",
146+
ResourceCount: 1,
147+
},
148+
}
149+
}
150+
return nil
151+
},
152+
},
153+
nodes: []*apiv1.Node{
154+
test.BuildTestNode("n1", 1000, 2*units.GiB),
155+
test.BuildTestNode("n2", 2000, 4*units.GiB),
156+
test.BuildTestNode("n3", 3000, 8*units.GiB),
157+
},
158+
limits: map[string]int64{
159+
"cpu": 12,
160+
"memory": 32 * units.GiB,
161+
"gpu": 1,
162+
},
163+
newNode: test.BuildTestNode("n4", 2000, 4*units.GiB),
164+
nodeDelta: 2,
165+
wantResult: &CheckDeltaResult{
166+
AllowedDelta: 0,
167+
ExceededQuotas: []ExceededQuota{
168+
{ID: "cluster-wide", ExceededResources: []string{"gpu"}},
169+
},
170+
},
171+
},
172+
{
173+
name: "node filter config allowed operation",
174+
nodeFilter: nodeExcludeFn(func(node *apiv1.Node) bool {
175+
return node.Name == "n3"
176+
}),
177+
nodes: []*apiv1.Node{
178+
test.BuildTestNode("n1", 1000, 2*units.GiB),
179+
test.BuildTestNode("n2", 2000, 4*units.GiB),
180+
test.BuildTestNode("n3", 3000, 8*units.GiB),
181+
},
182+
limits: map[string]int64{
183+
"cpu": 4,
184+
"memory": 8 * units.GiB,
185+
},
186+
newNode: test.BuildTestNode("n4", 1000, 2*units.GiB),
187+
nodeDelta: 1,
188+
wantResult: &CheckDeltaResult{
189+
AllowedDelta: 1,
190+
},
191+
},
192+
{
193+
name: "node filter config exceeded operation",
194+
nodeFilter: nodeExcludeFn(func(node *apiv1.Node) bool {
195+
return node.Name == "n3"
196+
}),
197+
nodes: []*apiv1.Node{
198+
test.BuildTestNode("n1", 1000, 2*units.GiB),
199+
test.BuildTestNode("n2", 2000, 4*units.GiB),
200+
test.BuildTestNode("n3", 3000, 8*units.GiB),
201+
},
202+
limits: map[string]int64{
203+
"cpu": 4,
204+
"memory": 8 * units.GiB,
205+
},
206+
newNode: test.BuildTestNode("n4", 2000, 4*units.GiB),
207+
nodeDelta: 1,
208+
wantResult: &CheckDeltaResult{
209+
AllowedDelta: 0,
210+
ExceededQuotas: []ExceededQuota{
211+
{ID: "cluster-wide", ExceededResources: []string{"cpu", "memory"}},
212+
},
213+
},
214+
},
215+
}
216+
for _, tc := range testCases {
217+
t.Run(tc.name, func(t *testing.T) {
218+
cloudProvider := cptest.NewTestCloudProviderBuilder().Build()
219+
resourceLimiter := cloudprovider.NewResourceLimiter(nil, tc.limits)
220+
cloudProvider.SetResourceLimiter(resourceLimiter)
221+
ctx := &context.AutoscalingContext{CloudProvider: cloudProvider}
222+
crp := tc.crp
223+
if crp == nil {
224+
crp = &fakeCustomResourcesProcessor{}
225+
}
226+
factory := NewTrackerFactory(TrackerOptions{
227+
CustomResourcesProcessor: crp,
228+
QuotaProvider: NewCloudQuotasProvider(cloudProvider),
229+
NodeFilter: tc.nodeFilter,
230+
})
231+
tracker, err := factory.NewQuotasTracker(ctx, tc.nodes)
232+
if err != nil {
233+
t.Errorf("failed to create tracker: %v", err)
234+
}
235+
var ng cloudprovider.NodeGroup
236+
result, err := tracker.CheckDelta(ctx, ng, tc.newNode, tc.nodeDelta)
237+
if err != nil {
238+
t.Errorf("failed to check delta: %v", err)
239+
}
240+
opts := []cmp.Option{
241+
cmpopts.SortSlices(func(a, b string) bool { return a < b }),
242+
cmpopts.EquateEmpty(),
243+
}
244+
if diff := cmp.Diff(tc.wantResult, result, opts...); diff != "" {
245+
t.Errorf("CheckDelta() mismatch (-want +got):\n%s", diff)
246+
}
247+
})
248+
}
249+
}

0 commit comments

Comments
 (0)