Skip to content

Commit 9f2e149

Browse files
committed
feat: add MaxNodeUtilizationPercent in ReclaimedResourceConfiguration
1 parent 29d4745 commit 9f2e149

File tree

10 files changed

+225
-11
lines changed

10 files changed

+225
-11
lines changed

cmd/katalyst-agent/app/options/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ type ReclaimedResourceOptions struct {
3434
MinReclaimedResourceForReport general.ResourceList
3535
ReservedResourceForAllocate general.ResourceList
3636
ReservedResourceForReclaimedCores general.ResourceList
37+
MaxNodeUtilizationPercent map[string]int64
3738

3839
*cpuheadroom.CPUHeadroomOptions
3940
*memoryheadroom.MemoryHeadroomOptions
@@ -58,8 +59,9 @@ func NewReclaimedResourceOptions() *ReclaimedResourceOptions {
5859
v1.ResourceCPU: resource.MustParse("4"),
5960
v1.ResourceMemory: resource.MustParse("0"),
6061
},
61-
CPUHeadroomOptions: cpuheadroom.NewCPUHeadroomOptions(),
62-
MemoryHeadroomOptions: memoryheadroom.NewMemoryHeadroomOptions(),
62+
MaxNodeUtilizationPercent: map[string]int64{},
63+
CPUHeadroomOptions: cpuheadroom.NewCPUHeadroomOptions(),
64+
MemoryHeadroomOptions: memoryheadroom.NewMemoryHeadroomOptions(),
6365
}
6466
}
6567

@@ -77,6 +79,8 @@ func (o *ReclaimedResourceOptions) AddFlags(fss *cliflag.NamedFlagSets) {
7779
"reserved reclaimed resource actually not allocate to reclaimed resource")
7880
fs.Var(&o.ReservedResourceForReclaimedCores, "reserved-resource-for-reclaimed-cores",
7981
"reserved resources for reclaimed_cores pods")
82+
fs.StringToInt64Var(&o.MaxNodeUtilizationPercent, "max-node-utilization-percent", o.MaxNodeUtilizationPercent,
83+
"node utilization resource limit for reclaimed pool")
8084

8185
o.CPUHeadroomOptions.AddFlags(fss)
8286
o.MemoryHeadroomOptions.AddFlags(fss)
@@ -91,6 +95,12 @@ func (o *ReclaimedResourceOptions) ApplyTo(c *reclaimedresource.ReclaimedResourc
9195
c.ReservedResourceForAllocate = v1.ResourceList(o.ReservedResourceForAllocate)
9296
c.MinReclaimedResourceForAllocate = v1.ResourceList(o.ReservedResourceForReclaimedCores)
9397

98+
maxNodeUtilizationPercent := make(map[v1.ResourceName]int64)
99+
for resourceName, value := range o.MaxNodeUtilizationPercent {
100+
maxNodeUtilizationPercent[v1.ResourceName(resourceName)] = value
101+
}
102+
c.MaxNodeUtilizationPercent = maxNodeUtilizationPercent
103+
94104
errList = append(errList, o.CPUHeadroomOptions.ApplyTo(c.CPUHeadroomConfiguration))
95105
errList = append(errList, o.MemoryHeadroomOptions.ApplyTo(c.MemoryHeadroomConfiguration))
96106
return errors.NewAggregate(errList)

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ require (
161161
)
162162

163163
replace (
164+
github.com/kubewharf/katalyst-api => github.com/WangZzzhe/katalyst-api v0.0.0-20240718113528-f7732f541adb
164165
k8s.io/api => k8s.io/api v0.24.6
165166
k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6
166167
k8s.io/apimachinery => k8s.io/apimachinery v0.24.6

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWX
8484
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
8585
github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg=
8686
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=
87+
github.com/WangZzzhe/katalyst-api v0.0.0-20240718113528-f7732f541adb h1:e0Ucv0uoKUs0DRdbIaatoT+CebRydUPJCvL9HExzTdw=
88+
github.com/WangZzzhe/katalyst-api v0.0.0-20240718113528-f7732f541adb/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
8789
github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c=
8890
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
8991
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
@@ -568,8 +570,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
568570
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
569571
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
570572
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
571-
github.com/kubewharf/katalyst-api v0.5.1-0.20240702044746-be552fd7ea7d h1:6CuK3axf2B63zIkEu5XyxbaC+JArE/3Jo3QHvb+Hn0M=
572-
github.com/kubewharf/katalyst-api v0.5.1-0.20240702044746-be552fd7ea7d/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
573573
github.com/kubewharf/kubelet v1.24.6-kubewharf.8 h1:2e89T/nZTgzaVhyRsZuwEdRk8V8kJXs4PRkgfeG4Ai4=
574574
github.com/kubewharf/kubelet v1.24.6-kubewharf.8/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
575575
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=

pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"time"
2626

2727
v1 "k8s.io/api/core/v1"
28+
"k8s.io/apimachinery/pkg/api/resource"
2829
"k8s.io/apimachinery/pkg/util/sets"
2930
"k8s.io/apimachinery/pkg/util/wait"
3031

@@ -623,5 +624,8 @@ func (p *CPUPressureLoadEviction) checkPressureWithAdvisedThreshold() bool {
623624
// for now, we consider ReservedResourceForAllocate as downgrading or manual intervention configuration,
624625
// when it's set to a value greater than zero, fall back to static threshold
625626
dynamicConfiguration := p.dynamicConf.GetDynamicConfiguration()
626-
return dynamicConfiguration.EnableReclaim && dynamicConfiguration.ReservedResourceForAllocate.Cpu().Value() == 0
627+
reservedResourceForAllocate := dynamicConfiguration.GetReservedResourceForAllocate(v1.ResourceList{
628+
v1.ResourceCPU: *resource.NewQuantity(int64(p.metaServer.NumCPUs), resource.DecimalSI),
629+
})
630+
return dynamicConfiguration.EnableReclaim && reservedResourceForAllocate.Cpu().Value() == 0
627631
}

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"fmt"
2121

2222
v1 "k8s.io/api/core/v1"
23+
"k8s.io/apimachinery/pkg/api/resource"
2324
"k8s.io/apimachinery/pkg/util/sets"
2425
"k8s.io/klog/v2"
2526
"k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
@@ -168,7 +169,10 @@ func (cra *cpuResourceAdvisor) updateNumasAvailableResource() {
168169
}
169170

170171
func (cra *cpuResourceAdvisor) getNumasReservedForAllocate(numas machine.CPUSet) float64 {
171-
reserved := cra.conf.GetDynamicConfiguration().ReservedResourceForAllocate[v1.ResourceCPU]
172+
reserved := cra.conf.GetDynamicConfiguration().GetReservedResourceForAllocate(v1.ResourceList{
173+
v1.ResourceCPU: *resource.NewQuantity(int64(cra.metaServer.NumCPUs), resource.DecimalSI),
174+
})[v1.ResourceCPU]
175+
172176
return float64(reserved.Value()*int64(numas.Size())) / float64(cra.metaServer.NumNUMANodes)
173177
}
174178

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,9 @@ func (ha *HeadroomAssemblerCommon) GetHeadroom() (resource.Quantity, error) {
7878
return *resource.NewQuantity(0, resource.DecimalSI), nil
7979
}
8080

81-
reserved := ha.conf.GetDynamicConfiguration().ReservedResourceForAllocate[v1.ResourceCPU]
81+
reserved := ha.conf.GetDynamicConfiguration().GetReservedResourceForAllocate(v1.ResourceList{
82+
v1.ResourceCPU: *resource.NewQuantity(int64(ha.metaServer.NumCPUs), resource.DecimalSI),
83+
})[v1.ResourceCPU]
8284
headroomTotal := 0.0
8385
emptyNUMAs := ha.metaServer.CPUDetails.NUMANodes()
8486
exclusiveNUMAs := machine.NewCPUSet()

pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,9 @@ func (ra *memoryResourceAdvisor) update() error {
205205
return fmt.Errorf("meta reader has not synced")
206206
}
207207

208-
reservedForAllocate := ra.conf.GetDynamicConfiguration().
209-
ReservedResourceForAllocate[v1.ResourceMemory]
208+
reservedForAllocate := ra.conf.GetDynamicConfiguration().GetReservedResourceForAllocate(v1.ResourceList{
209+
v1.ResourceMemory: *resource.NewQuantity(int64(ra.metaServer.MemoryCapacity), resource.BinarySI),
210+
})[v1.ResourceMemory]
210211

211212
for _, headroomPolicy := range ra.headroomPolices {
212213
// capacity and reserved can both be adjusted dynamically during running process

pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/memory_provisioner.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"sync"
2424

2525
v1 "k8s.io/api/core/v1"
26+
"k8s.io/apimachinery/pkg/api/resource"
2627

2728
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/memoryadvisor"
2829
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/metacache"
@@ -96,8 +97,9 @@ func (m *memoryProvisioner) initializeMemoryProvisioner() error {
9697
}
9798

9899
func (m *memoryProvisioner) Reconcile(status *types.MemoryPressureStatus) (err error) {
99-
reservedForAllocate := m.conf.GetDynamicConfiguration().
100-
ReservedResourceForAllocate[v1.ResourceMemory]
100+
reservedForAllocate := m.conf.GetDynamicConfiguration().GetReservedResourceForAllocate(v1.ResourceList{
101+
v1.ResourceMemory: *resource.NewQuantity(int64(m.metaServer.MemoryCapacity), resource.BinarySI),
102+
})[v1.ResourceMemory]
101103
m.policy.SetEssentials(
102104
types.ResourceEssentials{
103105
EnableReclaim: m.conf.GetDynamicConfiguration().EnableReclaim,

pkg/config/agent/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@ package reclaimedresource
1818

1919
import (
2020
v1 "k8s.io/api/core/v1"
21+
"k8s.io/klog/v2"
2122

2223
"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/reclaimedresource/cpuheadroom"
2324
"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/reclaimedresource/memoryheadroom"
2425
"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/crd"
26+
"github.com/kubewharf/katalyst-core/pkg/util/native"
2527
)
2628

2729
type ReclaimedResourceConfiguration struct {
@@ -30,6 +32,7 @@ type ReclaimedResourceConfiguration struct {
3032
MinReclaimedResourceForReport v1.ResourceList
3133
ReservedResourceForAllocate v1.ResourceList
3234
MinReclaimedResourceForAllocate v1.ResourceList
35+
MaxNodeUtilizationPercent map[v1.ResourceName]int64
3336

3437
*cpuheadroom.CPUHeadroomConfiguration
3538
*memoryheadroom.MemoryHeadroomConfiguration
@@ -72,8 +75,50 @@ func (c *ReclaimedResourceConfiguration) ApplyConfiguration(conf *crd.DynamicCon
7275
c.MinReclaimedResourceForAllocate[resourceName] = value
7376
}
7477
}
78+
79+
if config.MaxNodeUtilizationPercent != nil {
80+
for resourceName, value := range config.MaxNodeUtilizationPercent {
81+
c.MaxNodeUtilizationPercent[resourceName] = value
82+
}
83+
}
7584
}
7685

7786
c.CPUHeadroomConfiguration.ApplyConfiguration(conf)
7887
c.MemoryHeadroomConfiguration.ApplyConfiguration(conf)
7988
}
89+
90+
func (c *ReclaimedResourceConfiguration) GetReservedResourceForAllocate(nodeResourceList v1.ResourceList) v1.ResourceList {
91+
if len(c.MaxNodeUtilizationPercent) == 0 {
92+
return c.ReservedResourceForAllocate
93+
}
94+
95+
res := v1.ResourceList{}
96+
for resource, quantity := range c.ReservedResourceForAllocate {
97+
nodeAllocatable, ok := nodeResourceList[resource]
98+
if !ok {
99+
res[resource] = quantity
100+
continue
101+
}
102+
103+
maxUtil, ok := c.MaxNodeUtilizationPercent[resource]
104+
if !ok {
105+
res[resource] = quantity
106+
continue
107+
}
108+
if maxUtil <= 0 || maxUtil > 100 {
109+
klog.Warningf("unsupported MaxNodeUtilizationPercent, resourceName: %v, value: %v", resource, maxUtil)
110+
res[resource] = quantity
111+
continue
112+
}
113+
114+
nodeAllocatableCopy := nodeAllocatable.DeepCopy()
115+
nodeAllocatableCopy.Sub(native.MultiplyResourceQuantity(resource, nodeAllocatableCopy, float64(maxUtil)/100.0))
116+
res[resource] = nodeAllocatableCopy
117+
118+
klog.V(6).Infof("GetReservedResourceForAllocate resource: %v, nodeResource: %v, "+
119+
"MaxNodeUtilizationPercent: %v, ReservedResourceForAllocate: %v, res: %v",
120+
resource, nodeAllocatable.String(), maxUtil, quantity.String(), nodeAllocatableCopy.String())
121+
}
122+
123+
return res
124+
}
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package reclaimedresource
18+
19+
import (
20+
"testing"
21+
22+
"github.com/stretchr/testify/assert"
23+
v1 "k8s.io/api/core/v1"
24+
"k8s.io/apimachinery/pkg/api/resource"
25+
)
26+
27+
func TestGetReservedResourceForAllocate(t *testing.T) {
28+
t.Parallel()
29+
30+
for _, tc := range []struct {
31+
name string
32+
config *ReclaimedResourceConfiguration
33+
nodeResourceList v1.ResourceList
34+
expectRes v1.ResourceList
35+
}{
36+
{
37+
name: "MaxNodeUtilizationPercent not set",
38+
config: &ReclaimedResourceConfiguration{
39+
ReservedResourceForAllocate: map[v1.ResourceName]resource.Quantity{
40+
v1.ResourceCPU: resource.MustParse("4"),
41+
v1.ResourceMemory: resource.MustParse("8Gi"),
42+
},
43+
},
44+
nodeResourceList: map[v1.ResourceName]resource.Quantity{},
45+
expectRes: map[v1.ResourceName]resource.Quantity{
46+
v1.ResourceCPU: resource.MustParse("4"),
47+
v1.ResourceMemory: resource.MustParse("8Gi"),
48+
},
49+
},
50+
{
51+
name: "MaxNodeUtilizationPercent set, only get cpu",
52+
config: &ReclaimedResourceConfiguration{
53+
ReservedResourceForAllocate: map[v1.ResourceName]resource.Quantity{
54+
v1.ResourceCPU: resource.MustParse("4"),
55+
v1.ResourceMemory: resource.MustParse("8Gi"),
56+
},
57+
MaxNodeUtilizationPercent: map[v1.ResourceName]int64{
58+
v1.ResourceCPU: 60,
59+
v1.ResourceMemory: 80,
60+
},
61+
},
62+
nodeResourceList: map[v1.ResourceName]resource.Quantity{
63+
v1.ResourceCPU: resource.MustParse("32"),
64+
},
65+
expectRes: map[v1.ResourceName]resource.Quantity{
66+
v1.ResourceCPU: resource.MustParse("12800m"),
67+
v1.ResourceMemory: resource.MustParse("8Gi"),
68+
},
69+
},
70+
{
71+
name: "MaxNodeUtilizationPercent set, only get memory",
72+
config: &ReclaimedResourceConfiguration{
73+
ReservedResourceForAllocate: map[v1.ResourceName]resource.Quantity{
74+
v1.ResourceCPU: resource.MustParse("4"),
75+
v1.ResourceMemory: resource.MustParse("8Gi"),
76+
},
77+
MaxNodeUtilizationPercent: map[v1.ResourceName]int64{
78+
v1.ResourceCPU: 60,
79+
v1.ResourceMemory: 80,
80+
},
81+
},
82+
nodeResourceList: map[v1.ResourceName]resource.Quantity{
83+
v1.ResourceMemory: resource.MustParse("100Gi"),
84+
},
85+
expectRes: map[v1.ResourceName]resource.Quantity{
86+
v1.ResourceCPU: resource.MustParse("4"),
87+
v1.ResourceMemory: resource.MustParse("20Gi"),
88+
},
89+
},
90+
{
91+
name: "MaxNodeUtilizationPercent only cpu set",
92+
config: &ReclaimedResourceConfiguration{
93+
ReservedResourceForAllocate: map[v1.ResourceName]resource.Quantity{
94+
v1.ResourceCPU: resource.MustParse("4"),
95+
v1.ResourceMemory: resource.MustParse("8Gi"),
96+
},
97+
MaxNodeUtilizationPercent: map[v1.ResourceName]int64{
98+
v1.ResourceMemory: 80,
99+
},
100+
},
101+
nodeResourceList: map[v1.ResourceName]resource.Quantity{
102+
v1.ResourceCPU: resource.MustParse("32"),
103+
},
104+
expectRes: map[v1.ResourceName]resource.Quantity{
105+
v1.ResourceCPU: resource.MustParse("4"),
106+
v1.ResourceMemory: resource.MustParse("8Gi"),
107+
},
108+
},
109+
{
110+
name: "MaxNodeUtilizationPercent value not supported",
111+
config: &ReclaimedResourceConfiguration{
112+
ReservedResourceForAllocate: map[v1.ResourceName]resource.Quantity{
113+
v1.ResourceCPU: resource.MustParse("4"),
114+
v1.ResourceMemory: resource.MustParse("8Gi"),
115+
},
116+
MaxNodeUtilizationPercent: map[v1.ResourceName]int64{
117+
v1.ResourceCPU: 120,
118+
v1.ResourceMemory: 0,
119+
},
120+
},
121+
nodeResourceList: map[v1.ResourceName]resource.Quantity{
122+
v1.ResourceCPU: resource.MustParse("32"),
123+
v1.ResourceMemory: resource.MustParse("100Gi"),
124+
},
125+
expectRes: map[v1.ResourceName]resource.Quantity{
126+
v1.ResourceCPU: resource.MustParse("4"),
127+
v1.ResourceMemory: resource.MustParse("8Gi"),
128+
},
129+
},
130+
} {
131+
tc := tc
132+
t.Run(tc.name, func(t *testing.T) {
133+
t.Parallel()
134+
135+
res := tc.config.GetReservedResourceForAllocate(tc.nodeResourceList)
136+
assert.Equal(t, len(tc.expectRes), len(res))
137+
for resource, quantity := range res {
138+
expectQuan, ok := tc.expectRes[resource]
139+
assert.True(t, ok)
140+
141+
assert.True(t, quantity.Equal(expectQuan))
142+
}
143+
})
144+
}
145+
}

0 commit comments

Comments
 (0)