Skip to content

Commit 0365cf4

Browse files
jingczhangpsasnal
authored andcommitted
KEP-4540: Add CPUManager policy option strict-cpu-reservation
Signed-off-by: Jing Zhang <[email protected]>
1 parent be8ea98 commit 0365cf4

File tree

2 files changed

+33
-7
lines changed

2 files changed

+33
-7
lines changed

pkg/kubelet/cm/cpumanager/policy_options.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,15 @@ const (
3333
DistributeCPUsAcrossNUMAOption string = "distribute-cpus-across-numa"
3434
AlignBySocketOption string = "align-by-socket"
3535
DistributeCPUsAcrossCoresOption string = "distribute-cpus-across-cores"
36+
StrictCPUReservationOption string = "strict-cpu-reservation"
3637
)
3738

3839
var (
3940
alphaOptions = sets.New[string](
4041
DistributeCPUsAcrossNUMAOption,
4142
AlignBySocketOption,
4243
DistributeCPUsAcrossCoresOption,
44+
StrictCPUReservationOption,
4345
)
4446
betaOptions = sets.New[string](
4547
FullPCPUsOnlyOption,
@@ -86,6 +88,8 @@ type StaticPolicyOptions struct {
8688
// cpus (HT) on different physical core.
8789
// This is a preferred policy so do not throw error if they have to packed in one physical core.
8890
DistributeCPUsAcrossCores bool
91+
// Flag to remove reserved cores from the list of available cores
92+
StrictCPUReservation bool
8993
}
9094

9195
// NewStaticPolicyOptions creates a StaticPolicyOptions struct from the user configuration.
@@ -121,7 +125,12 @@ func NewStaticPolicyOptions(policyOptions map[string]string) (StaticPolicyOption
121125
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
122126
}
123127
opts.DistributeCPUsAcrossCores = optValue
124-
128+
case StrictCPUReservationOption:
129+
optValue, err := strconv.ParseBool(value)
130+
if err != nil {
131+
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
132+
}
133+
opts.StrictCPUReservation = optValue
125134
default:
126135
// this should never be reached, we already detect unknown options,
127136
// but we keep it as further safety.

pkg/kubelet/cm/cpumanager/policy_static.go

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -195,24 +195,36 @@ func (p *staticPolicy) validateState(s state.State) error {
195195
tmpAssignments := s.GetCPUAssignments()
196196
tmpDefaultCPUset := s.GetDefaultCPUSet()
197197

198+
allCPUs := p.topology.CPUDetails.CPUs()
199+
if p.options.StrictCPUReservation {
200+
allCPUs = allCPUs.Difference(p.reservedCPUs)
201+
}
202+
198203
// Default cpuset cannot be empty when assignments exist
199204
if tmpDefaultCPUset.IsEmpty() {
200205
if len(tmpAssignments) != 0 {
201206
return fmt.Errorf("default cpuset cannot be empty")
202207
}
203208
// state is empty initialize
204-
allCPUs := p.topology.CPUDetails.CPUs()
205209
s.SetDefaultCPUSet(allCPUs)
210+
klog.InfoS("Static policy initialized", "defaultCPUSet", allCPUs)
206211
return nil
207212
}
208213

209214
// State has already been initialized from file (is not empty)
210215
// 1. Check if the reserved cpuset is not part of default cpuset because:
211216
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
212217
// - user tampered with file
213-
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
214-
return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
215-
p.reservedCPUs.String(), tmpDefaultCPUset.String())
218+
if p.options.StrictCPUReservation {
219+
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).IsEmpty() {
220+
return fmt.Errorf("some of strictly reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
221+
p.reservedCPUs.Intersection(tmpDefaultCPUset).String(), tmpDefaultCPUset.String())
222+
}
223+
} else {
224+
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
225+
return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
226+
p.reservedCPUs.String(), tmpDefaultCPUset.String())
227+
}
216228
}
217229

218230
// 2. Check if state for static policy is consistent
@@ -235,15 +247,20 @@ func (p *staticPolicy) validateState(s state.State) error {
235247
// the set of CPUs stored in the state.
236248
totalKnownCPUs := tmpDefaultCPUset.Clone()
237249
tmpCPUSets := []cpuset.CPUSet{}
250+
tmpAllCPUs := p.topology.CPUDetails.CPUs()
238251
for pod := range tmpAssignments {
239252
for _, cset := range tmpAssignments[pod] {
240253
tmpCPUSets = append(tmpCPUSets, cset)
241254
}
242255
}
243256
totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...)
244-
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
257+
if p.options.StrictCPUReservation {
258+
tmpAllCPUs = tmpAllCPUs.Difference(p.reservedCPUs)
259+
}
260+
if !totalKnownCPUs.Equals(tmpAllCPUs) {
245261
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
246-
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
262+
tmpAllCPUs.String(), totalKnownCPUs.String())
263+
247264
}
248265

249266
return nil

0 commit comments

Comments
 (0)