Skip to content

Commit a93e3e7

Browse files
authored
Merge pull request kubernetes#127483 from nokia/strict-cpu-reservation-core
KEP-4540: Add CPUManager policy option to restrict reservedSystemCPUs to system daemons and interrupt processing
2 parents d702d26 + f7e9766 commit a93e3e7

File tree

4 files changed

+98
-20
lines changed

4 files changed

+98
-20
lines changed

pkg/kubelet/cm/cpumanager/policy_options.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,15 @@ const (
3333
DistributeCPUsAcrossNUMAOption string = "distribute-cpus-across-numa"
3434
AlignBySocketOption string = "align-by-socket"
3535
DistributeCPUsAcrossCoresOption string = "distribute-cpus-across-cores"
36+
StrictCPUReservationOption string = "strict-cpu-reservation"
3637
)
3738

3839
var (
3940
alphaOptions = sets.New[string](
4041
DistributeCPUsAcrossNUMAOption,
4142
AlignBySocketOption,
4243
DistributeCPUsAcrossCoresOption,
44+
StrictCPUReservationOption,
4345
)
4446
betaOptions = sets.New[string](
4547
FullPCPUsOnlyOption,
@@ -86,6 +88,8 @@ type StaticPolicyOptions struct {
8688
// cpus (HT) on different physical core.
8789
// This is a preferred policy so do not throw error if they have to packed in one physical core.
8890
DistributeCPUsAcrossCores bool
91+
// Flag to remove reserved cores from the list of available cores
92+
StrictCPUReservation bool
8993
}
9094

9195
// NewStaticPolicyOptions creates a StaticPolicyOptions struct from the user configuration.
@@ -121,7 +125,12 @@ func NewStaticPolicyOptions(policyOptions map[string]string) (StaticPolicyOption
121125
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
122126
}
123127
opts.DistributeCPUsAcrossCores = optValue
124-
128+
case StrictCPUReservationOption:
129+
optValue, err := strconv.ParseBool(value)
130+
if err != nil {
131+
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
132+
}
133+
opts.StrictCPUReservation = optValue
125134
default:
126135
// this should never be reached, we already detect unknown options,
127136
// but we keep it as further safety.

pkg/kubelet/cm/cpumanager/policy_options_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,18 @@ func TestPolicyOptionsAvailable(t *testing.T) {
118118
featureGateEnable: true,
119119
expectedAvailable: false,
120120
},
121+
{
122+
option: StrictCPUReservationOption,
123+
featureGate: pkgfeatures.CPUManagerPolicyAlphaOptions,
124+
featureGateEnable: true,
125+
expectedAvailable: true,
126+
},
127+
{
128+
option: StrictCPUReservationOption,
129+
featureGate: pkgfeatures.CPUManagerPolicyBetaOptions,
130+
featureGateEnable: true,
131+
expectedAvailable: false,
132+
},
121133
}
122134
for _, testCase := range testCases {
123135
t.Run(testCase.option, func(t *testing.T) {

pkg/kubelet/cm/cpumanager/policy_static.go

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,24 +202,36 @@ func (p *staticPolicy) validateState(s state.State) error {
202202
tmpAssignments := s.GetCPUAssignments()
203203
tmpDefaultCPUset := s.GetDefaultCPUSet()
204204

205+
allCPUs := p.topology.CPUDetails.CPUs()
206+
if p.options.StrictCPUReservation {
207+
allCPUs = allCPUs.Difference(p.reservedCPUs)
208+
}
209+
205210
// Default cpuset cannot be empty when assignments exist
206211
if tmpDefaultCPUset.IsEmpty() {
207212
if len(tmpAssignments) != 0 {
208213
return fmt.Errorf("default cpuset cannot be empty")
209214
}
210215
// state is empty initialize
211-
allCPUs := p.topology.CPUDetails.CPUs()
212216
s.SetDefaultCPUSet(allCPUs)
217+
klog.InfoS("Static policy initialized", "defaultCPUSet", allCPUs)
213218
return nil
214219
}
215220

216221
// State has already been initialized from file (is not empty)
217222
// 1. Check if the reserved cpuset is not part of default cpuset because:
218223
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
219224
// - user tampered with file
220-
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
221-
return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
222-
p.reservedCPUs.String(), tmpDefaultCPUset.String())
225+
if p.options.StrictCPUReservation {
226+
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).IsEmpty() {
227+
return fmt.Errorf("some of strictly reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
228+
p.reservedCPUs.Intersection(tmpDefaultCPUset).String(), tmpDefaultCPUset.String())
229+
}
230+
} else {
231+
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
232+
return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
233+
p.reservedCPUs.String(), tmpDefaultCPUset.String())
234+
}
223235
}
224236

225237
// 2. Check if state for static policy is consistent
@@ -248,9 +260,10 @@ func (p *staticPolicy) validateState(s state.State) error {
248260
}
249261
}
250262
totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...)
251-
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
263+
if !totalKnownCPUs.Equals(allCPUs) {
252264
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
253-
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
265+
allCPUs.String(), totalKnownCPUs.String())
266+
254267
}
255268

256269
return nil

pkg/kubelet/cm/cpumanager/policy_static_test.go

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,15 @@ func TestStaticPolicyStart(t *testing.T) {
107107
stDefaultCPUSet: cpuset.New(0, 1),
108108
expErr: fmt.Errorf("not all reserved cpus: \"0,6\" are present in defaultCpuSet: \"0-1\""),
109109
},
110+
{
111+
description: "some of reserved cores are present in available cpuset (StrictCPUReservationOption)",
112+
topo: topoDualSocketHT,
113+
numReservedCPUs: 2,
114+
options: map[string]string{StrictCPUReservationOption: "true"},
115+
stAssignments: state.ContainerCPUAssignments{},
116+
stDefaultCPUSet: cpuset.New(0, 1),
117+
expErr: fmt.Errorf("some of strictly reserved cpus: \"0\" are present in defaultCpuSet: \"0-1\""),
118+
},
110119
{
111120
description: "assigned core 2 is still present in available cpuset",
112121
topo: topoDualSocketHT,
@@ -118,6 +127,18 @@ func TestStaticPolicyStart(t *testing.T) {
118127
stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
119128
expErr: fmt.Errorf("pod: fakePod, container: 0 cpuset: \"0-2\" overlaps with default cpuset \"2-11\""),
120129
},
130+
{
131+
description: "assigned core 2 is still present in available cpuset (StrictCPUReservationOption)",
132+
topo: topoDualSocketHT,
133+
options: map[string]string{StrictCPUReservationOption: "true"},
134+
stAssignments: state.ContainerCPUAssignments{
135+
"fakePod": map[string]cpuset.CPUSet{
136+
"0": cpuset.New(0, 1, 2),
137+
},
138+
},
139+
stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
140+
expErr: fmt.Errorf("pod: fakePod, container: 0 cpuset: \"0-2\" overlaps with default cpuset \"2-11\""),
141+
},
121142
{
122143
description: "core 12 is not present in topology but is in state cpuset",
123144
topo: topoDualSocketHT,
@@ -145,7 +166,8 @@ func TestStaticPolicyStart(t *testing.T) {
145166
}
146167
for _, testCase := range testCases {
147168
t.Run(testCase.description, func(t *testing.T) {
148-
p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
169+
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true)
170+
p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), testCase.options)
149171
policy := p.(*staticPolicy)
150172
st := &mockState{
151173
assignments: testCase.stAssignments,
@@ -939,17 +961,18 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
939961
// above test cases are without kubelet --reserved-cpus cmd option
940962
// the following tests are with --reserved-cpus configured
941963
type staticPolicyTestWithResvList struct {
942-
description string
943-
topo *topology.CPUTopology
944-
numReservedCPUs int
945-
reserved cpuset.CPUSet
946-
stAssignments state.ContainerCPUAssignments
947-
stDefaultCPUSet cpuset.CPUSet
948-
pod *v1.Pod
949-
expErr error
950-
expNewErr error
951-
expCPUAlloc bool
952-
expCSet cpuset.CPUSet
964+
description string
965+
topo *topology.CPUTopology
966+
numReservedCPUs int
967+
reserved cpuset.CPUSet
968+
cpuPolicyOptions map[string]string
969+
stAssignments state.ContainerCPUAssignments
970+
stDefaultCPUSet cpuset.CPUSet
971+
pod *v1.Pod
972+
expErr error
973+
expNewErr error
974+
expCPUAlloc bool
975+
expCSet cpuset.CPUSet
953976
}
954977

955978
func TestStaticPolicyStartWithResvList(t *testing.T) {
@@ -963,6 +986,16 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
963986
stDefaultCPUSet: cpuset.New(),
964987
expCSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
965988
},
989+
{
990+
description: "empty cpuset with StrictCPUReservationOption enabled",
991+
topo: topoDualSocketHT,
992+
numReservedCPUs: 2,
993+
reserved: cpuset.New(0, 1),
994+
cpuPolicyOptions: map[string]string{StrictCPUReservationOption: "true"},
995+
stAssignments: state.ContainerCPUAssignments{},
996+
stDefaultCPUSet: cpuset.New(),
997+
expCSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
998+
},
966999
{
9671000
description: "reserved cores 0 & 1 are not present in available cpuset",
9681001
topo: topoDualSocketHT,
@@ -972,6 +1005,16 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
9721005
stDefaultCPUSet: cpuset.New(2, 3, 4, 5),
9731006
expErr: fmt.Errorf("not all reserved cpus: \"0-1\" are present in defaultCpuSet: \"2-5\""),
9741007
},
1008+
{
1009+
description: "reserved cores 0 & 1 are present in available cpuset with StrictCPUReservationOption enabled",
1010+
topo: topoDualSocketHT,
1011+
numReservedCPUs: 2,
1012+
reserved: cpuset.New(0, 1),
1013+
cpuPolicyOptions: map[string]string{StrictCPUReservationOption: "true"},
1014+
stAssignments: state.ContainerCPUAssignments{},
1015+
stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5),
1016+
expErr: fmt.Errorf("some of strictly reserved cpus: \"0-1\" are present in defaultCpuSet: \"0-5\""),
1017+
},
9751018
{
9761019
description: "inconsistency between numReservedCPUs and reserved",
9771020
topo: topoDualSocketHT,
@@ -984,7 +1027,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
9841027
}
9851028
for _, testCase := range testCases {
9861029
t.Run(testCase.description, func(t *testing.T) {
987-
p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
1030+
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true)
1031+
p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testCase.cpuPolicyOptions)
9881032
if !reflect.DeepEqual(err, testCase.expNewErr) {
9891033
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
9901034
testCase.description, testCase.expNewErr, err)

0 commit comments

Comments
 (0)