Skip to content

Commit 21162d2

Browse files
committed
refactor: get topology
Signed-off-by: thxCode <[email protected]>
1 parent 70f2814 commit 21162d2

File tree

7 files changed

+385
-241
lines changed

7 files changed

+385
-241
lines changed

gpustack_runtime/cmds/detector.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,20 @@ def run(self):
117117
for topo in topologies:
118118
print(format_topology_table(topo))
119119

120+
# Legend
121+
legend_lines = [
122+
"",
123+
"Legend (from nearest to farthest):",
124+
" X = Self",
125+
" LINK = Connection traversing with High-Speed Link (e.g., AMD XGMI, Ascend HCCS, NVIDIA NVLink)",
126+
" PIX = Connection traversing at most a single PCIe bridge",
127+
" PXB = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)",
128+
" PHB = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)",
129+
" NODE = Connection traversing PCIe and the interconnect between NUMA nodes",
130+
" SYS = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)",
131+
]
132+
print(os.linesep.join(legend_lines))
133+
120134

121135
def format_devices_json(devs: Devices) -> str:
122136
return json.dumps([dev.to_dict() for dev in devs], indent=2)
@@ -259,21 +273,8 @@ def format_topology_table(topo: Topology) -> str:
259273
"+" + "-" * (width - 2) + "+",
260274
]
261275

262-
# Legend
263-
legend_lines = [
264-
"",
265-
"Legend (from nearest to farthest):",
266-
" X = Self",
267-
" LINK = Connection traversing with High-Speed Link (e.g., NVIDIA NVLink, Ascend HCCS)",
268-
" PIX = Connection traversing at most a single PCIe bridge",
269-
" PXB = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)",
270-
" PHB = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)",
271-
" NODE = Connection traversing PCIe and the interconnect between NUMA nodes",
272-
" SYS = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)",
273-
]
274-
275276
# Combine all parts
276-
return os.linesep.join(header_lines + topology_lines + footer_lines + legend_lines)
277+
return os.linesep.join(header_lines + topology_lines + footer_lines)
277278

278279

279280
def format_topologies_json(topologies: list[Topology]) -> str:

gpustack_runtime/detector/__types__.py

Lines changed: 71 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -251,21 +251,6 @@ class Topology:
251251
The value at index i represents the Memory set for device i.
252252
"""
253253

254-
@staticmethod
255-
def stringify_devices_distance(distance: int) -> str:
256-
"""
257-
Stringify the devices distance to a human-readable format.
258-
259-
Args:
260-
distance:
261-
The distance between two devices.
262-
263-
Returns:
264-
A string representing the distance.
265-
266-
"""
267-
return str(distance)
268-
269254
def __init__(
270255
self,
271256
manufacturer: ManufacturerEnum,
@@ -298,7 +283,7 @@ def stringify(self) -> list[list[str]]:
298283
devices_info: list[list[str]] = [[]] * devices_count
299284
for i in range(devices_count):
300285
devices_info[i] = [
301-
self.stringify_devices_distance(d) for d in self.devices_distances[i]
286+
stringify_devices_distance(d) for d in self.devices_distances[i]
302287
]
303288
devices_info[i] += [
304289
self.devices_cpu_affinities[i]
@@ -313,6 +298,76 @@ def stringify(self) -> list[list[str]]:
313298
return devices_info
314299

315300

301+
class TopologyDistanceEnum(int, Enum):
302+
"""
303+
Enum for Topology Distance Levels.
304+
"""
305+
306+
INTERNAL = 0
307+
"""
308+
Same device.
309+
"""
310+
LINK = 5
311+
"""
312+
Connection traversing with High-Speed Link (e.g., AMD XGMI, Ascend HCCS, NVIDIA NVLink).
313+
"""
314+
PIX = 10
315+
"""
316+
Connection traversing at most a single PCIe bridge.
317+
"""
318+
PXB = 20
319+
"""
320+
Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge).
321+
"""
322+
PHB = 30
323+
"""
324+
Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU).
325+
"""
326+
NODE = 40
327+
"""
328+
Connection traversing PCIe and the interconnect between NUMA nodes.
329+
"""
330+
SYS = 50
331+
"""
332+
Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI).
333+
"""
334+
UNKNOWN = 100
335+
"""
336+
Unknown connection.
337+
"""
338+
339+
340+
def stringify_devices_distance(distance: int) -> str:
341+
"""
342+
Stringify the devices distance to a human-readable format.
343+
344+
Args:
345+
distance:
346+
The distance between two devices.
347+
348+
Returns:
349+
A string representing the distance.
350+
351+
"""
352+
match distance:
353+
case 0:
354+
return "X"
355+
case 5:
356+
return "LINK"
357+
case 10:
358+
return "PIX"
359+
case 20:
360+
return "PXB"
361+
case 30:
362+
return "PHB"
363+
case 40:
364+
return "NODE"
365+
case 50:
366+
return "SYS"
367+
case _:
368+
return "N/A"
369+
370+
316371
def reduce_devices_distances(
317372
devices_distances: list[list[int]],
318373
) -> dict[int, list[int]]:

gpustack_runtime/detector/__utils__.py

Lines changed: 78 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,28 @@ def get_cpu_numa_node_mapping() -> list[int | None]:
673673
return mapping
674674

675675

676+
@lru_cache(maxsize=1)
677+
def get_numa_node_cpu_mapping() -> dict[int, list[int]]:
678+
"""
679+
Map NUMA nodes to CPU cores.
680+
The key corresponds to the NUMA node number,
681+
and the value is the list of CPU cores that belong to that NUMA node.
682+
683+
Returns:
684+
A dictionary mapping NUMA nodes to lists of CPU cores.
685+
686+
"""
687+
cpu_numa_mapping = get_cpu_numa_node_mapping()
688+
689+
numa_cpu_mapping: dict[int, list[int]] = {}
690+
for cpu_idx, numa_node in enumerate(cpu_numa_mapping):
691+
if numa_node is not None:
692+
if numa_node not in numa_cpu_mapping:
693+
numa_cpu_mapping[numa_node] = []
694+
numa_cpu_mapping[numa_node].append(cpu_idx)
695+
return numa_cpu_mapping
696+
697+
676698
@lru_cache
677699
def map_cpu_affinity_to_numa_node(cpu_affinity: int | str | None) -> str:
678700
"""
@@ -722,6 +744,53 @@ def map_cpu_affinity_to_numa_node(cpu_affinity: int | str | None) -> str:
722744
return list_to_range_str(sorted(numa_nodes))
723745

724746

747+
@lru_cache
748+
def map_numa_node_to_cpu_affinity(numa_node: int | str | None) -> str:
749+
"""
750+
Map NUMA nodes to CPU affinity.
751+
752+
Args:
753+
numa_node:
754+
The NUMA nodes as an integer bitmask or a string (e.g., "0-1,3").
755+
756+
Returns:
757+
A comma-separated string of CPU core indices,
758+
or blank string if no CPU cores are found.
759+
760+
"""
761+
if numa_node is None:
762+
return ""
763+
764+
if isinstance(numa_node, int):
765+
numa_indices = bits_to_list(numa_node)
766+
else:
767+
numa_indices: list[int] = []
768+
for part in numa_node.split(","):
769+
if "-" in part:
770+
lo, hi = part.split("-")
771+
lo_idx = safe_int(lo, -1)
772+
hi_idx = safe_int(hi, -1)
773+
if lo_idx == -1 or hi_idx == -1 or lo_idx > hi_idx:
774+
continue
775+
numa_indices.extend(list(range(lo_idx, hi_idx + 1)))
776+
else:
777+
idx = safe_int(part, -1)
778+
if idx == -1:
779+
continue
780+
numa_indices.append(idx)
781+
782+
numa_cpu_mapping = get_numa_node_cpu_mapping()
783+
784+
cpu_cores: set[int] = set()
785+
for numa_idx in numa_indices:
786+
if numa_idx in numa_cpu_mapping:
787+
cpu_cores.update(numa_cpu_mapping[numa_idx])
788+
if not cpu_cores:
789+
return ""
790+
791+
return list_to_range_str(sorted(cpu_cores))
792+
793+
725794
def bits_to_list(bits: int, offset: int = 0) -> list[int]:
726795
"""
727796
Convert a bitmask to a list of set bit indices.
@@ -765,20 +834,18 @@ def bits_to_str(bits: int, offset: int = 0, prefix: str = "") -> str:
765834
if bits_list:
766835
if bits_str:
767836
bits_str += ","
768-
bits_str += list_to_range_str(bits_list, is_sorted=True)
837+
bits_str += list_to_range_str(bits_list)
769838

770839
return bits_str
771840

772841

773-
def list_to_range_str(indices: list[int], is_sorted: bool = False) -> str:
842+
def list_to_range_str(indices: list[int]) -> str:
774843
"""
775844
Convert a list of indices to a comma-separated string with ranges.
776845
777846
Args:
778847
indices:
779-
The list of indices.
780-
is_sorted:
781-
The indicates whether the input list is already sorted.
848+
The list of indices, must be sorted in ascending order.
782849
783850
Returns:
784851
A comma-separated string with ranges (e.g., "0,2-4,6").
@@ -787,17 +854,14 @@ def list_to_range_str(indices: list[int], is_sorted: bool = False) -> str:
787854
if not indices:
788855
return ""
789856

790-
sorted_indices = indices
791-
if not is_sorted:
792-
sorted_indices = sorted(set(indices))
793-
if len(sorted_indices) == 1:
794-
return f"{sorted_indices[0]}"
795-
if len(sorted_indices) == (sorted_indices[-1] - sorted_indices[0] + 1):
796-
return f"{sorted_indices[0]}-{sorted_indices[-1]}"
857+
if len(indices) == 1:
858+
return f"{indices[0]}"
859+
if len(indices) == (indices[-1] - indices[0] + 1):
860+
return f"{indices[0]}-{indices[-1]}"
797861

798-
start, end = sorted_indices[0], sorted_indices[0]
862+
start, end = indices[0], indices[0]
799863
ranges: list[tuple[int, int]] = []
800-
for i in sorted_indices[1:]:
864+
for i in indices[1:]:
801865
if i == end + 1:
802866
end = i
803867
else:

0 commit comments

Comments
 (0)