|
15 | 15 | package topologyaware |
16 | 16 |
|
17 | 17 | import ( |
| 18 | + "errors" |
18 | 19 | "fmt" |
19 | 20 | "strconv" |
20 | 21 | "time" |
@@ -78,6 +79,8 @@ type Supply interface { |
78 | 79 | GetScore(Request) Score |
79 | 80 | // AllocatableSharedCPU calculates the allocatable amount of shared CPU of this supply. |
80 | 81 | AllocatableSharedCPU(...bool) int |
| 82 | + // SliceableCPUs calculates the shared cpuset we can slice exclusive CPUs off of. |
| 83 | + SliceableCPUs() (cpuset.CPUSet, error) |
81 | 84 | // Allocate allocates a grant from the supply. |
82 | 85 | Allocate(Request, *libmem.Offer) (Grant, map[string]libmem.NodeMask, error) |
83 | 86 | // ReleaseCPU releases a previously allocated CPU grant from this supply. |
@@ -433,19 +436,28 @@ func (cs *supply) AllocateCPU(r Request) (Grant, error) { |
433 | 436 | } |
434 | 437 |
|
435 | 438 | case full > 0 && cs.AllocatableSharedCPU() > 1000*full: |
| 439 | + sliceable, err := cs.SliceableCPUs() |
| 440 | + if err != nil { |
| 441 | + return nil, policyError("internal error: "+ |
| 442 | + "%s: can't take %d exclusive CPUs from shared %s: %v", |
| 443 | + cs.node.Name(), full, cs.SharableCPUs(), err) |
| 444 | + } |
| 445 | + |
| 446 | + log.Debug("%s: sliceable cpuset is %s", cs.node.Name(), sliceable) |
436 | 447 | if cr.PickByHints() { |
437 | | - exclusive, ok = cs.takeCPUsByHints(&cs.sharable, cr) |
| 448 | + exclusive, ok = cs.takeCPUsByHints(&sliceable, cr) |
438 | 449 | if !ok { |
439 | | - exclusive, err = cs.takeCPUs(&cs.sharable, nil, full, cr.CPUPrio()) |
| 450 | + exclusive, err = cs.takeCPUs(&sliceable, nil, full, cr.CPUPrio()) |
440 | 451 | } |
441 | 452 | } else { |
442 | | - exclusive, err = cs.takeCPUs(&cs.sharable, nil, full, cr.CPUPrio()) |
| 453 | + exclusive, err = cs.takeCPUs(&sliceable, nil, full, cr.CPUPrio()) |
443 | 454 | } |
444 | 455 | if err != nil { |
445 | 456 | return nil, policyError("internal error: "+ |
446 | 457 | "%s: can't take %d exclusive CPUs from %s: %v", |
447 | | - cs.node.Name(), full, cs.sharable, err) |
| 458 | + cs.node.Name(), full, sliceable, err) |
448 | 459 | } |
| 460 | + cs.sharable = cs.sharable.Difference(exclusive) |
449 | 461 |
|
450 | 462 | case full > 0: |
451 | 463 | return nil, policyError("internal error: "+ |
@@ -673,6 +685,10 @@ func (cs *supply) DumpAllocatable() string { |
673 | 685 | sep = ", " |
674 | 686 | } |
675 | 687 | cpu += sep + fmt.Sprintf("allocatable:%dm)", cs.AllocatableSharedCPU(true)) |
| 688 | + |
| 689 | + sliceable, _ := cs.SliceableCPUs() |
| 690 | + cpu += fmt.Sprintf("/sliceable:%s (%dm)", kubernetes.ShortCPUSet(sliceable), |
| 691 | + 1000*sliceable.Size()) |
676 | 692 | } |
677 | 693 |
|
678 | 694 | allocatable := "<" + cs.node.Name() + " allocatable: " |
@@ -997,12 +1013,81 @@ func (cs *supply) AllocatableSharedCPU(quiet ...bool) int { |
997 | 1013 | shared = pShared |
998 | 1014 | } |
999 | 1015 | } |
| 1016 | + |
1000 | 1017 | if verbose { |
1001 | 1018 | log.Debug("%s: ancestor-adjusted free shared CPU: %dm", cs.node.Name(), shared) |
1002 | 1019 | } |
| 1020 | + |
| 1021 | + // If there are BestEffort or 0 CPU request Burstable containers in the node |
| 1022 | + // or any of its children we need to account for them an extra milliCPU worth |
| 1023 | + // of shared capacity. |
| 1024 | + // |
| 1025 | + // TODO(klihub): We might need to try speeding this up if it gets too slow. |
| 1026 | + // Obvious optimizations would be to 1) allow {Breadth,Depth}First to stop |
| 1027 | + // early if possible, and 2) store grants per assigned node. |
| 1028 | + hasZeroCpuReqs := false |
| 1029 | + cs.node.BreadthFirst(func(n Node) { |
| 1030 | + if cs.node.Policy().hasZeroCpuReqContainer(n) { |
| 1031 | + hasZeroCpuReqs = true |
| 1032 | + } |
| 1033 | + }) |
| 1034 | + if hasZeroCpuReqs { |
| 1035 | + shared-- |
| 1036 | + if verbose { |
| 1037 | + log.Debug("%s: 0 CPU req-adjusted free shared CPU: %dm", |
| 1038 | + cs.node.Name(), shared) |
| 1039 | + } |
| 1040 | + } |
| 1041 | + |
1003 | 1042 | return shared |
1004 | 1043 | } |
1005 | 1044 |
|
| 1045 | +// SliceableCPUs calculates the shared cpuset we can slice exclusive CPUs off of. |
| 1046 | +func (cs *supply) SliceableCPUs() (cpuset.CPUSet, error) { |
| 1047 | + var ( |
| 1048 | + sliceable = cpuset.New() |
| 1049 | + errs []error |
| 1050 | + ) |
| 1051 | + |
| 1052 | + // We need to avoid slicing any shared pool in child nodes below its |
| 1053 | + // current allocation. To do so we go through the subtree collecting |
| 1054 | + // CPUs from shared pools for allocation but leaving off enough full |
| 1055 | + // CPUs in each for their current BestEffort and Burstable QoS class |
| 1056 | + // shared allocations. |
| 1057 | + |
| 1058 | + cs.node.DepthFirst(func(n Node) { |
| 1059 | + if n.IsSameNode(cs.node) && !n.IsLeafNode() { |
| 1060 | + return |
| 1061 | + } |
| 1062 | + |
| 1063 | + ns := n.FreeSupply() |
| 1064 | + if ns == nil { |
| 1065 | + return |
| 1066 | + } |
| 1067 | + |
| 1068 | + cpus := ns.SharableCPUs() |
| 1069 | + free := ns.AllocatableSharedCPU(true) / 1000 |
| 1070 | + |
| 1071 | + // TODO(klihub): We should ideally take also into account the CPU |
| 1072 | + // priority preference of any ongoing allocation, trying to slice |
| 1073 | + // CPUs with a matching preference. We don't do that ATM. |
| 1074 | + |
| 1075 | + cset, err := cs.takeCPUs(&cpus, nil, free, nonePrio) |
| 1076 | + if err != nil { |
| 1077 | + errs = append(errs, err) |
| 1078 | + return |
| 1079 | + } |
| 1080 | + |
| 1081 | + sliceable = sliceable.Union(cset) |
| 1082 | + }) |
| 1083 | + |
| 1084 | + if len(errs) > 0 { |
| 1085 | + return cpuset.New(), errors.Join(errs...) |
| 1086 | + } |
| 1087 | + |
| 1088 | + return sliceable, nil |
| 1089 | +} |
| 1090 | + |
1006 | 1091 | // Eval... |
1007 | 1092 | func (score *score) Eval() float64 { |
1008 | 1093 | return 1.0 |
@@ -1199,7 +1284,7 @@ func (cg *grant) String() string { |
1199 | 1284 | reserved = fmt.Sprintf(", reserved: %s (%dm)", |
1200 | 1285 | cg.node.FreeSupply().ReservedCPUs(), cg.ReservedPortion()) |
1201 | 1286 | } |
1202 | | - if cg.SharedPortion() > 0 { |
| 1287 | + if cg.SharedPortion() > 0 || (isol.IsEmpty() && cg.exclusive.IsEmpty()) { |
1203 | 1288 | shared = fmt.Sprintf(", shared: %s (%dm)", |
1204 | 1289 | cg.node.FreeSupply().SharableCPUs(), cg.SharedPortion()) |
1205 | 1290 | } |
|
0 commit comments