Skip to content

Commit 596c233

Browse files
klihubaskervin
authored andcommitted
topology-aware: don't slice busy shared pools empty.
Update allocation to account an extra milliCPU capacity if a node has BestEffort containers assigned to the shared pool of the node or any of its children. When slicing exclusive CPUs from a shared pool, make sure we don't slice any of the busy shared pools empty. Signed-off-by: Krisztian Litkey <krisztian.litkey@intel.com>
1 parent 00c774b commit 596c233

File tree

2 files changed

+126
-5
lines changed

2 files changed

+126
-5
lines changed

cmd/plugins/topology-aware/policy/pools.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,42 @@ func (p *policy) updateSharedAllocations(grant *Grant) {
625625
}
626626
}
627627

628+
func (p *policy) foreachContainer(pool Node, fn func(ctr cache.Container) bool) {
629+
for _, g := range p.allocations.grants {
630+
if !pool.IsSameNode(g.GetCPUNode()) {
631+
continue
632+
}
633+
if done := fn(g.GetContainer()); done {
634+
return
635+
}
636+
}
637+
}
638+
639+
func (p *policy) hasZeroCpuReqContainer(pool Node) bool {
640+
found := false
641+
642+
p.foreachContainer(pool, func(ctr cache.Container) bool {
643+
switch ctr.GetQOSClass() {
644+
case corev1.PodQOSBestEffort:
645+
found = true
646+
return true
647+
case corev1.PodQOSBurstable:
648+
resources, ok := ctr.GetResourceUpdates()
649+
if !ok {
650+
resources = ctr.GetResourceRequirements()
651+
}
652+
request := resources.Requests[corev1.ResourceCPU]
653+
if request.MilliValue() == 0 {
654+
found = true
655+
return true
656+
}
657+
}
658+
return false
659+
})
660+
661+
return found
662+
}
663+
628664
// Score pools against the request and sort them by score.
629665
func (p *policy) sortPoolsByScore(req Request, aff map[int]int32) (map[int]Score, []Node) {
630666
scores := make(map[int]Score, p.nodeCnt)

cmd/plugins/topology-aware/policy/resources.go

Lines changed: 90 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package topologyaware
1616

1717
import (
18+
"errors"
1819
"fmt"
1920
"strconv"
2021
"time"
@@ -78,6 +79,8 @@ type Supply interface {
7879
GetScore(Request) Score
7980
// AllocatableSharedCPU calculates the allocatable amount of shared CPU of this supply.
8081
AllocatableSharedCPU(...bool) int
82+
// SliceableCPUs calculates the shared cpuset we can slice exclusive CPUs off of.
83+
SliceableCPUs() (cpuset.CPUSet, error)
8184
// Allocate allocates a grant from the supply.
8285
Allocate(Request, *libmem.Offer) (Grant, map[string]libmem.NodeMask, error)
8386
// ReleaseCPU releases a previously allocated CPU grant from this supply.
@@ -433,19 +436,28 @@ func (cs *supply) AllocateCPU(r Request) (Grant, error) {
433436
}
434437

435438
case full > 0 && cs.AllocatableSharedCPU() > 1000*full:
439+
sliceable, err := cs.SliceableCPUs()
440+
if err != nil {
441+
return nil, policyError("internal error: "+
442+
"%s: can't take %d exclusive CPUs from shared %s: %v",
443+
cs.node.Name(), full, cs.SharableCPUs(), err)
444+
}
445+
446+
log.Debug("%s: sliceable cpuset is %s", cs.node.Name(), sliceable)
436447
if cr.PickByHints() {
437-
exclusive, ok = cs.takeCPUsByHints(&cs.sharable, cr)
448+
exclusive, ok = cs.takeCPUsByHints(&sliceable, cr)
438449
if !ok {
439-
exclusive, err = cs.takeCPUs(&cs.sharable, nil, full, cr.CPUPrio())
450+
exclusive, err = cs.takeCPUs(&sliceable, nil, full, cr.CPUPrio())
440451
}
441452
} else {
442-
exclusive, err = cs.takeCPUs(&cs.sharable, nil, full, cr.CPUPrio())
453+
exclusive, err = cs.takeCPUs(&sliceable, nil, full, cr.CPUPrio())
443454
}
444455
if err != nil {
445456
return nil, policyError("internal error: "+
446457
"%s: can't take %d exclusive CPUs from %s: %v",
447-
cs.node.Name(), full, cs.sharable, err)
458+
cs.node.Name(), full, sliceable, err)
448459
}
460+
cs.sharable = cs.sharable.Difference(exclusive)
449461

450462
case full > 0:
451463
return nil, policyError("internal error: "+
@@ -673,6 +685,10 @@ func (cs *supply) DumpAllocatable() string {
673685
sep = ", "
674686
}
675687
cpu += sep + fmt.Sprintf("allocatable:%dm)", cs.AllocatableSharedCPU(true))
688+
689+
sliceable, _ := cs.SliceableCPUs()
690+
cpu += fmt.Sprintf("/sliceable:%s (%dm)", kubernetes.ShortCPUSet(sliceable),
691+
1000*sliceable.Size())
676692
}
677693

678694
allocatable := "<" + cs.node.Name() + " allocatable: "
@@ -997,12 +1013,81 @@ func (cs *supply) AllocatableSharedCPU(quiet ...bool) int {
9971013
shared = pShared
9981014
}
9991015
}
1016+
10001017
if verbose {
10011018
log.Debug("%s: ancestor-adjusted free shared CPU: %dm", cs.node.Name(), shared)
10021019
}
1020+
1021+
// If there are BestEffort or 0 CPU request Burstable containers in the node
1022+
// or any of its children we need to account for them an extra milliCPU worth
1023+
// of shared capacity.
1024+
//
1025+
// TODO(klihub): We might need to try speeding this up if it gets too slow.
1026+
// Obvious optimizations would be to 1) allow {Breadth,Depth}First to stop
1027+
// early if possible, and 2) store grants per assigned node.
1028+
hasZeroCpuReqs := false
1029+
cs.node.BreadthFirst(func(n Node) {
1030+
if cs.node.Policy().hasZeroCpuReqContainer(n) {
1031+
hasZeroCpuReqs = true
1032+
}
1033+
})
1034+
if hasZeroCpuReqs {
1035+
shared--
1036+
if verbose {
1037+
log.Debug("%s: 0 CPU req-adjusted free shared CPU: %dm",
1038+
cs.node.Name(), shared)
1039+
}
1040+
}
1041+
10031042
return shared
10041043
}
10051044

1045+
// SliceableCPUs calculates the shared cpuset we can slice exclusive CPUs off of.
1046+
func (cs *supply) SliceableCPUs() (cpuset.CPUSet, error) {
1047+
var (
1048+
sliceable = cpuset.New()
1049+
errs []error
1050+
)
1051+
1052+
// We need to avoid slicing any shared pool in child nodes below its
1053+
// current allocation. To do so we go through the subtree collecting
1054+
// CPUs from shared pools for allocation but leaving off enough full
1055+
// CPUs in each for their current BestEffort and Burstable QoS class
1056+
// shared allocations.
1057+
1058+
cs.node.DepthFirst(func(n Node) {
1059+
if n.IsSameNode(cs.node) && !n.IsLeafNode() {
1060+
return
1061+
}
1062+
1063+
ns := n.FreeSupply()
1064+
if ns == nil {
1065+
return
1066+
}
1067+
1068+
cpus := ns.SharableCPUs()
1069+
free := ns.AllocatableSharedCPU(true) / 1000
1070+
1071+
// TODO(klihub): We should ideally take also into account the CPU
1072+
// priority preference of any ongoing allocation, trying to slice
1073+
// CPUs with a matching preference. We don't do that ATM.
1074+
1075+
cset, err := cs.takeCPUs(&cpus, nil, free, nonePrio)
1076+
if err != nil {
1077+
errs = append(errs, err)
1078+
return
1079+
}
1080+
1081+
sliceable = sliceable.Union(cset)
1082+
})
1083+
1084+
if len(errs) > 0 {
1085+
return cpuset.New(), errors.Join(errs...)
1086+
}
1087+
1088+
return sliceable, nil
1089+
}
1090+
10061091
// Eval...
10071092
func (score *score) Eval() float64 {
10081093
return 1.0
@@ -1199,7 +1284,7 @@ func (cg *grant) String() string {
11991284
reserved = fmt.Sprintf(", reserved: %s (%dm)",
12001285
cg.node.FreeSupply().ReservedCPUs(), cg.ReservedPortion())
12011286
}
1202-
if cg.SharedPortion() > 0 {
1287+
if cg.SharedPortion() > 0 || (isol.IsEmpty() && cg.exclusive.IsEmpty()) {
12031288
shared = fmt.Sprintf(", shared: %s (%dm)",
12041289
cg.node.FreeSupply().SharableCPUs(), cg.SharedPortion())
12051290
}

0 commit comments

Comments
 (0)