Skip to content

Commit 81bf1f8

Browse files
authored
Merge pull request kubernetes#90980 from AlexeyPerevalov/GetNUMANodeInfo
Avoid using socket for hints in generateCPUTopologyHints
2 parents 5f79e91 + a047e8a commit 81bf1f8

File tree

13 files changed

+72
-171
lines changed

13 files changed

+72
-171
lines changed

pkg/kubelet/cm/BUILD

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ go_library(
5656
"//pkg/apis/core/v1/helper/qos:go_default_library",
5757
"//pkg/kubelet/cadvisor:go_default_library",
5858
"//pkg/kubelet/cm/containermap:go_default_library",
59-
"//pkg/kubelet/cm/cpumanager/topology:go_default_library",
6059
"//pkg/kubelet/cm/devicemanager:go_default_library",
6160
"//pkg/kubelet/cm/util:go_default_library",
6261
"//pkg/kubelet/events:go_default_library",
@@ -118,7 +117,6 @@ go_library(
118117
"//pkg/apis/core/v1/helper/qos:go_default_library",
119118
"//pkg/kubelet/cadvisor:go_default_library",
120119
"//pkg/kubelet/cm/containermap:go_default_library",
121-
"//pkg/kubelet/cm/cpumanager/topology:go_default_library",
122120
"//pkg/kubelet/cm/devicemanager:go_default_library",
123121
"//pkg/kubelet/cm/util:go_default_library",
124122
"//pkg/kubelet/events:go_default_library",

pkg/kubelet/cm/container_manager_linux.go

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ import (
5252
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
5353
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
5454
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
55-
cputopology "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
5655
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
5756
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
5857
cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util"
@@ -238,13 +237,6 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
238237
if err != nil {
239238
return nil, err
240239
}
241-
// Correct NUMA information is currently missing from cadvisor's
242-
// MachineInfo struct, so we use the CPUManager's internal logic for
243-
// gathering NUMANodeInfo to pass to components that care about it.
244-
numaNodeInfo, err := cputopology.GetNUMANodeInfo()
245-
if err != nil {
246-
return nil, err
247-
}
248240
capacity := cadvisor.CapacityFromMachineInfo(machineInfo)
249241
for k, v := range capacity {
250242
internalCapacity[k] = v
@@ -300,7 +292,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
300292

301293
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManager) {
302294
cm.topologyManager, err = topologymanager.NewManager(
303-
numaNodeInfo,
295+
machineInfo.Topology,
304296
nodeConfig.ExperimentalTopologyManagerPolicy,
305297
)
306298

@@ -315,7 +307,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
315307

316308
klog.Infof("Creating device plugin manager: %t", devicePluginEnabled)
317309
if devicePluginEnabled {
318-
cm.deviceManager, err = devicemanager.NewManagerImpl(numaNodeInfo, cm.topologyManager)
310+
cm.deviceManager, err = devicemanager.NewManagerImpl(machineInfo.Topology, cm.topologyManager)
319311
cm.topologyManager.AddHintProvider(cm.deviceManager)
320312
} else {
321313
cm.deviceManager, err = devicemanager.NewManagerStub()
@@ -330,7 +322,6 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
330322
nodeConfig.ExperimentalCPUManagerPolicy,
331323
nodeConfig.ExperimentalCPUManagerReconcilePeriod,
332324
machineInfo,
333-
numaNodeInfo,
334325
nodeConfig.NodeAllocatableConfig.ReservedSystemCPUs,
335326
cm.GetNodeAllocatableReservation(),
336327
nodeConfig.KubeletRootDir,

pkg/kubelet/cm/cpumanager/cpu_manager.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ func (s *sourcesReadyStub) AddSource(source string) {}
126126
func (s *sourcesReadyStub) AllReady() bool { return true }
127127

128128
// NewManager creates new cpu manager based on provided policy
129-
func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, numaNodeInfo topology.NUMANodeInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
129+
func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
130130
var topo *topology.CPUTopology
131131
var policy Policy
132132

@@ -137,7 +137,7 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
137137

138138
case PolicyStatic:
139139
var err error
140-
topo, err = topology.Discover(machineInfo, numaNodeInfo)
140+
topo, err = topology.Discover(machineInfo)
141141
if err != nil {
142142
return nil, err
143143
}

pkg/kubelet/cm/cpumanager/cpu_manager_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ func TestCPUManagerGenerate(t *testing.T) {
629629
}
630630
defer os.RemoveAll(sDir)
631631

632-
mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, nil, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
632+
mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
633633
if testCase.expectedError != nil {
634634
if !strings.Contains(err.Error(), testCase.expectedError.Error()) {
635635
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), testCase.expectedError.Error())

pkg/kubelet/cm/cpumanager/policy_static.go

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -364,24 +364,18 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
364364
func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reusableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint {
365365
// Initialize minAffinitySize to include all NUMA Nodes.
366366
minAffinitySize := p.topology.CPUDetails.NUMANodes().Size()
367-
// Initialize minSocketsOnMinAffinity to include all Sockets.
368-
minSocketsOnMinAffinity := p.topology.CPUDetails.Sockets().Size()
369367

370-
// Iterate through all combinations of socket bitmask and build hints from them.
368+
// Iterate through all combinations of numa nodes bitmask and build hints from them.
371369
hints := []topologymanager.TopologyHint{}
372370
bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().ToSlice(), func(mask bitmask.BitMask) {
373-
// First, update minAffinitySize and minSocketsOnMinAffinity for the
374-
// current request size.
371+
// First, update minAffinitySize for the current request size.
375372
cpusInMask := p.topology.CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size()
376-
socketsInMask := p.topology.CPUDetails.SocketsInNUMANodes(mask.GetBits()...).Size()
377373
if cpusInMask >= request && mask.Count() < minAffinitySize {
378374
minAffinitySize = mask.Count()
379-
if socketsInMask < minSocketsOnMinAffinity {
380-
minSocketsOnMinAffinity = socketsInMask
381-
}
382375
}
383376

384-
// Then check to see if all of the reusable CPUs are part of the bitmask.
377+
// Then check to see if we have enough CPUs available on the current
378+
// numa node bitmask to satisfy the CPU request.
385379
numMatching := 0
386380
for _, c := range reusableCPUs.ToSlice() {
387381
// Disregard this mask if its NUMANode isn't part of it.
@@ -404,7 +398,7 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
404398
return
405399
}
406400

407-
// Otherwise, create a new hint from the socket bitmask and add it to the
401+
// Otherwise, create a new hint from the numa node bitmask and add it to the
408402
// list of hints. We set all hint preferences to 'false' on the first
409403
// pass through.
410404
hints = append(hints, topologymanager.TopologyHint{
@@ -416,14 +410,10 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
416410
// Loop back through all hints and update the 'Preferred' field based on
417411
// counting the number of bits sets in the affinity mask and comparing it
418412
// to the minAffinitySize. Only those with an equal number of bits set (and
419-
// with a minimal set of sockets) will be considered preferred.
413+
// with a minimal set of numa nodes) will be considered preferred.
420414
for i := range hints {
421415
if hints[i].NUMANodeAffinity.Count() == minAffinitySize {
422-
nodes := hints[i].NUMANodeAffinity.GetBits()
423-
numSockets := p.topology.CPUDetails.SocketsInNUMANodes(nodes...).Size()
424-
if numSockets == minSocketsOnMinAffinity {
425-
hints[i].Preferred = true
426-
}
416+
hints[i].Preferred = true
427417
}
428418
}
429419

pkg/kubelet/cm/cpumanager/topology/BUILD

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,5 @@ go_test(
3333
name = "go_default_test",
3434
srcs = ["topology_test.go"],
3535
embed = [":go_default_library"],
36-
deps = [
37-
"//pkg/kubelet/cm/cpuset:go_default_library",
38-
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
39-
],
36+
deps = ["//vendor/github.com/google/cadvisor/info/v1:go_default_library"],
4037
)

pkg/kubelet/cm/cpumanager/topology/topology.go

Lines changed: 7 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ package topology
1818

1919
import (
2020
"fmt"
21-
"io/ioutil"
22-
"strings"
2321

2422
cadvisorapi "github.com/google/cadvisor/info/v1"
2523
"k8s.io/klog/v2"
@@ -218,34 +216,28 @@ func (d CPUDetails) CPUsInCores(ids ...int) cpuset.CPUSet {
218216
}
219217

220218
// Discover returns CPUTopology based on cadvisor node info
221-
func Discover(machineInfo *cadvisorapi.MachineInfo, numaNodeInfo NUMANodeInfo) (*CPUTopology, error) {
219+
func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
222220
if machineInfo.NumCores == 0 {
223221
return nil, fmt.Errorf("could not detect number of cpus")
224222
}
225223

226224
CPUDetails := CPUDetails{}
227225
numPhysicalCores := 0
228226

229-
for _, socket := range machineInfo.Topology {
230-
numPhysicalCores += len(socket.Cores)
231-
for _, core := range socket.Cores {
227+
for _, node := range machineInfo.Topology {
228+
numPhysicalCores += len(node.Cores)
229+
for _, core := range node.Cores {
232230
if coreID, err := getUniqueCoreID(core.Threads); err == nil {
233231
for _, cpu := range core.Threads {
234-
numaNodeID := 0
235-
for id, cset := range numaNodeInfo {
236-
if cset.Contains(cpu) {
237-
numaNodeID = id
238-
}
239-
}
240232
CPUDetails[cpu] = CPUInfo{
241233
CoreID: coreID,
242-
SocketID: socket.Id,
243-
NUMANodeID: numaNodeID,
234+
SocketID: core.SocketID,
235+
NUMANodeID: node.Id,
244236
}
245237
}
246238
} else {
247239
klog.Errorf("could not get unique coreID for socket: %d core %d threads: %v",
248-
socket.Id, core.Id, core.Threads)
240+
core.SocketID, core.Id, core.Threads)
249241
return nil, err
250242
}
251243
}
@@ -280,49 +272,3 @@ func getUniqueCoreID(threads []int) (coreID int, err error) {
280272

281273
return min, nil
282274
}
283-
284-
// GetNUMANodeInfo uses sysfs to return a map of NUMANode id to the list of
285-
// CPUs associated with that NUMANode.
286-
//
287-
// TODO: This is a temporary workaround until cadvisor provides this
288-
// information directly in machineInfo. We should remove this once this
289-
// information is available from cadvisor.
290-
func GetNUMANodeInfo() (NUMANodeInfo, error) {
291-
// Get the possible NUMA nodes on this machine. If reading this file
292-
// is not possible, this is not an error. Instead, we just return a
293-
// nil NUMANodeInfo, indicating that no NUMA information is available
294-
// on this machine. This should implicitly be interpreted as having a
295-
// single NUMA node with id 0 for all CPUs.
296-
nodelist, err := ioutil.ReadFile("/sys/devices/system/node/online")
297-
if err != nil {
298-
return nil, nil
299-
}
300-
301-
// Parse the nodelist into a set of Node IDs
302-
nodes, err := cpuset.Parse(strings.TrimSpace(string(nodelist)))
303-
if err != nil {
304-
return nil, err
305-
}
306-
307-
info := make(NUMANodeInfo)
308-
309-
// For each node...
310-
for _, node := range nodes.ToSlice() {
311-
// Read the 'cpulist' of the NUMA node from sysfs.
312-
path := fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", node)
313-
cpulist, err := ioutil.ReadFile(path)
314-
if err != nil {
315-
return nil, err
316-
}
317-
318-
// Convert the 'cpulist' into a set of CPUs.
319-
cpus, err := cpuset.Parse(strings.TrimSpace(string(cpulist)))
320-
if err != nil {
321-
return nil, err
322-
}
323-
324-
info[node] = cpus
325-
}
326-
327-
return info, nil
328-
}

0 commit comments

Comments
 (0)