Skip to content

Commit 62955d9

Browse files
committed
refactor: get topology
Signed-off-by: thxCode <[email protected]>
1 parent a50d774 commit 62955d9

File tree

10 files changed

+532
-104
lines changed

10 files changed

+532
-104
lines changed

gpustack_runtime/detector/__utils__.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ class PCIDevice:
2424
"""
2525
Path to the PCI device in sysfs.
2626
"""
27+
root: str
28+
"""
29+
Root of the PCI device.
30+
"""
31+
switches: list[str]
32+
"""
33+
Switches of the PCI device.
34+
"""
2735
address: str
2836
"""
2937
Address of the PCI device.
@@ -64,7 +72,7 @@ def get_pci_devices(
6472
vendor = [vendor]
6573

6674
for dev_path in sysfs_pci_path.iterdir():
67-
dev_address = dev_path.name
75+
dev_address = dev_path.name.lower()
6876
if address and dev_address not in address:
6977
continue
7078

@@ -92,10 +100,19 @@ def get_pci_devices(
92100
if dev_class is None or dev_config is None:
93101
continue
94102

103+
dev_path_resolved = dev_path.resolve()
104+
dev_switches = []
105+
dev_root = dev_path_resolved.parent
106+
while (dev_root != sysfs_pci_path) and (dev_root.name.count(":") == 2):
107+
dev_switches.append(dev_root.name)
108+
dev_root = dev_root.parent
109+
95110
pci_devices.append(
96111
PCIDevice(
97112
vendor=dev_vendor,
98-
path=str(dev_path),
113+
path=str(dev_path_resolved),
114+
root=dev_root.name,
115+
switches=dev_switches,
99116
address=dev_address,
100117
class_=dev_class,
101118
config=dev_config,
@@ -105,6 +122,40 @@ def get_pci_devices(
105122
return pci_devices
106123

107124

125+
def compare_pci_devices(
126+
dev_a: PCIDevice | None,
127+
dev_b: PCIDevice | None,
128+
) -> int:
129+
"""
130+
Compare two PCI devices.
131+
132+
Args:
133+
dev_a:
134+
The first PCI device.
135+
dev_b:
136+
The second PCI device.
137+
138+
Returns:
139+
-1 if devices have different roots,
140+
0 if devices have the same root but different switches,
141+
1 if devices have the same root and same switches.
142+
143+
"""
144+
if dev_a and dev_b:
145+
is_same_root = dev_a.root == dev_b.root
146+
is_same_switch = is_same_root and len(dev_a.switches) == len(dev_b.switches)
147+
if is_same_switch:
148+
for sw_a, sw_b in zip(dev_a.switches, dev_b.switches, strict=False):
149+
if sw_a != sw_b:
150+
is_same_switch = False
151+
break
152+
if is_same_switch:
153+
return 1
154+
if is_same_root:
155+
return 0
156+
return -1
157+
158+
108159
@dataclass
109160
class DeviceFile:
110161
path: str

gpustack_runtime/detector/amd.py

Lines changed: 64 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from .__utils__ import (
1212
PCIDevice,
1313
byte_to_mebibyte,
14+
compare_pci_devices,
1415
get_brief_version,
1516
get_numa_node_by_bdf,
1617
get_pci_devices,
@@ -217,7 +218,7 @@ def detect(self) -> Devices | None:
217218

218219
with contextlib.suppress(pyamdsmi.AmdSmiException):
219220
dev_bdf = pyamdsmi.amdsmi_get_gpu_device_bdf(dev)
220-
dev_appendix["bdf"] = str(dev_bdf).lower()
221+
dev_appendix["bdf"] = dev_bdf
221222

222223
ret.append(
223224
Device(
@@ -287,20 +288,48 @@ def get_device_handle(dev: Device):
287288
return devs_mapping.get(dev.index)
288289

289290
try:
291+
pci_devices = self.detect_pci_devices()
292+
293+
def distance_pci_devices(bdf_a: str, bdf_b: str) -> TopologyDistanceEnum:
294+
"""
295+
Compute distance between two PCI devices by their BDFs.
296+
297+
Args:
298+
bdf_a:
299+
The BDF of the first PCI device.
300+
bdf_b:
301+
The BDF of the second PCI device.
302+
303+
Returns:
304+
The TopologyDistanceEnum representing the distance.
305+
306+
"""
307+
pcid_a = pci_devices.get(bdf_a, None)
308+
pcid_b = pci_devices.get(bdf_b, None)
309+
310+
score = compare_pci_devices(pcid_a, pcid_b)
311+
if score > 0:
312+
return TopologyDistanceEnum.PIX
313+
if score == 0:
314+
return TopologyDistanceEnum.PXB
315+
return TopologyDistanceEnum.PHB
316+
290317
pyamdsmi.amdsmi_init()
291318

319+
# Get NUMA and CPU affinities.
292320
for i, dev_i in enumerate(devices):
293-
dev_i_handle = get_device_handle(dev_i)
294-
295321
# Get affinity with PCIe BDF if possible.
296322
if dev_i_bdf := dev_i.appendix.get("bdf", ""):
297-
numa_node = get_numa_node_by_bdf(dev_i_bdf)
298-
topology.devices_numa_affinities[i] = numa_node
323+
topology.devices_numa_affinities[i] = get_numa_node_by_bdf(
324+
dev_i_bdf,
325+
)
299326
topology.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
300-
numa_node,
327+
topology.devices_numa_affinities[i],
301328
)
302329
# Otherwise, get affinity via AMD SMI.
303330
if not topology.devices_cpu_affinities[i]:
331+
dev_i_handle = get_device_handle(dev_i)
332+
304333
# Get NUMA affinity.
305334
try:
306335
dev_i_numa_node = pyamdsmi.amdsmi_topo_get_numa_node_number(
@@ -315,41 +344,50 @@ def get_device_handle(dev: Device):
315344
)
316345
# Get CPU affinity.
317346
topology.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
318-
numa_node=topology.devices_numa_affinities[i],
347+
topology.devices_numa_affinities[i],
319348
)
320349

321-
# Get distances to other devices.
350+
# Get distances to other devices.
351+
for i, dev_i in enumerate(devices):
352+
dev_i_handle = get_device_handle(dev_i)
353+
322354
for j, dev_j in enumerate(devices):
323-
if i == j:
324-
continue
325-
if topology.devices_distances[i][j] != 0:
355+
if (
356+
dev_i.index == dev_j.index
357+
or topology.devices_distances[i][j] != 0
358+
):
326359
continue
327360

328361
dev_j_handle = get_device_handle(dev_j)
329362

330363
distance = TopologyDistanceEnum.UNK
331364
try:
332-
link_type = pyamdsmi.amdsmi_topo_get_link_type(
365+
link = pyamdsmi.amdsmi_topo_get_link_type(
333366
dev_i_handle,
334367
dev_j_handle,
335368
)
336-
match int(link_type.type):
369+
link_type = link.get("type", -1)
370+
link_hops = link.get("hops", -1)
371+
match link_type:
337372
case pyamdsmi.AMDSMI_LINK_TYPE_INTERNAL:
338373
distance = TopologyDistanceEnum.SELF
339-
case pyamdsmi.AMDSMI_LINK_TYPE_XGMI:
340-
distance = TopologyDistanceEnum.LINK
341-
# For PCIe links,
342-
# further distinguish between PHB and SYS based on NUMA affinity.
343374
case pyamdsmi.AMDSMI_LINK_TYPE_PCIE:
344-
if link_type == pyamdsmi.AMDSMI_LINK_TYPE_PCIE:
345-
dev_i_numa, dev_j_numa = (
346-
topology.devices_numa_affinities[i],
347-
topology.devices_numa_affinities[j],
375+
dev_i_numa, dev_j_numa = (
376+
topology.devices_numa_affinities[i],
377+
topology.devices_numa_affinities[j],
378+
)
379+
if dev_i_numa and dev_i_numa == dev_j_numa:
380+
distance = distance_pci_devices(
381+
dev_i.appendix.get("bdf", ""),
382+
dev_j.appendix.get("bdf", ""),
348383
)
349-
if dev_i_numa != "" and dev_i_numa == dev_j_numa:
350-
distance = TopologyDistanceEnum.PHB
351-
else:
352-
distance = TopologyDistanceEnum.SYS
384+
else:
385+
distance = TopologyDistanceEnum.SYS
386+
case pyamdsmi.AMDSMI_LINK_TYPE_XGMI:
387+
distance = TopologyDistanceEnum.LINK
388+
case _:
389+
if link_hops == 0:
390+
distance = TopologyDistanceEnum.SELF
353391
except pyamdsmi.AmdSmiException:
354392
debug_log_exception(
355393
logger,
@@ -361,7 +399,7 @@ def get_device_handle(dev: Device):
361399
topology.devices_distances[i][j] = distance
362400
topology.devices_distances[j][i] = distance
363401
except pyamdsmi.AmdSmiException:
364-
debug_log_exception(logger, "Failed to get topology")
402+
debug_log_exception(logger, "Failed to fetch topology")
365403
raise
366404
except Exception:
367405
debug_log_exception(logger, "Failed to process topology fetching")

gpustack_runtime/detector/ascend.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def detect(self) -> Devices | None:
213213
dev_card_id,
214214
dev_device_id,
215215
)
216-
dev_appendix["bdf"] = str(dev_bdf).lower()
216+
dev_appendix["bdf"] = dev_bdf
217217

218218
ret.append(
219219
Device(
@@ -276,10 +276,11 @@ def get_topology(self, devices: Devices | None = None) -> Topology | None:
276276

277277
# Get affinity with PCIe BDF if possible.
278278
if dev_i_bdf := dev_i.appendix.get("bdf", ""):
279-
numa_node = get_numa_node_by_bdf(dev_i_bdf)
280-
topology.devices_numa_affinities[i] = numa_node
279+
topology.devices_numa_affinities[i] = get_numa_node_by_bdf(
280+
dev_i_bdf,
281+
)
281282
topology.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
282-
numa_node,
283+
topology.devices_numa_affinities[i],
283284
)
284285
# Otherwise, get affinity via DCMI.
285286
if not topology.devices_cpu_affinities[i]:
@@ -298,14 +299,15 @@ def get_topology(self, devices: Devices | None = None) -> Topology | None:
298299
)
299300
# Get NUMA affinity.
300301
topology.devices_numa_affinities[i] = map_cpu_affinity_to_numa_node(
301-
cpu_affinity=topology.devices_cpu_affinities[i],
302+
topology.devices_cpu_affinities[i],
302303
)
303304

304305
# Get distances to other devices.
305306
for j, dev_j in enumerate(devices):
306-
if i == j:
307-
continue
308-
if topology.devices_distances[i][j] != 0:
307+
if (
308+
dev_i.index == dev_j.index
309+
or topology.devices_distances[i][j] != 0
310+
):
309311
continue
310312

311313
dev_j_card_id = dev_j.appendix["card_id"]

0 commit comments

Comments
 (0)