Skip to content

Commit 9ffa7c7

Browse files
committed
refactor: change indexes to index
Signed-off-by: thxCode <thxcode0824@gmail.com>
1 parent c67764a commit 9ffa7c7

File tree

7 files changed

+104
-48
lines changed

7 files changed

+104
-48
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
::: gpustack_runtime.deployer

gpustack_runtime/cmds/detector.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,7 @@ def format_devices_table(devs: Devices) -> str:
115115
device_lines = []
116116
for dev in devs:
117117
row_data = [
118-
str(
119-
dev.indexes[0]
120-
if dev.indexes and len(dev.indexes) == 1
121-
else ", ".join(str(i) for i in dev.indexes),
122-
),
118+
dev.index,
123119
dev.name if dev.name else "N/A",
124120
f"{dev.memory_used}MiB / {dev.memory}MiB"
125121
if dev.memory and dev.memory_used

gpustack_runtime/detector/__types__.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
from abc import ABC, abstractmethod
4-
from dataclasses import dataclass, field
4+
from dataclasses import dataclass
55
from enum import Enum
66
from typing import Any
77

@@ -133,12 +133,13 @@ class Device:
133133
"""
134134
Manufacturer of the device.
135135
"""
136-
indexes: list[int] = field(default_factory=list)
136+
index: int = 0
137137
"""
138-
Indexes of the device.
139-
For most devices, this field usually contains only one index.
140-
However, some devices use the chip on the device as the actual device,
141-
so this field may contain multiple indexes.
138+
Index of the device.
139+
If GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY is set to 1,
140+
this will be the physical index of the device.
141+
Otherwise, it will be the logical index of the device.
142+
Physical index is adapted to non-virtualized devices.
142143
"""
143144
name: str = ""
144145
"""

gpustack_runtime/detector/__utils__.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import re
34
from dataclasses import dataclass
45
from pathlib import Path
56

@@ -87,3 +88,64 @@ def get_pci_devices(
8788
)
8889

8990
return pci_devices
91+
92+
93+
@dataclass
94+
class DeviceFile:
95+
path: Path
96+
"""
97+
Path to the device file.
98+
"""
99+
number: int | None = None
100+
"""
101+
Number of the device file.
102+
"""
103+
104+
105+
def get_device_files(pattern: str, directory: Path = Path("/dev")) -> list[DeviceFile]:
106+
r"""
107+
Get device files with the given pattern.
108+
109+
Args:
110+
pattern:
111+
Pattern of the device files to search for.
112+
Pattern must include a regex group for the number,
113+
e.g nvidia(?P<number>\d+).
114+
directory:
115+
Directory to search for device files,
116+
e.g /dev.
117+
118+
Returns:
119+
List of DeviceFile objects.
120+
121+
"""
122+
if "(?P<number>" not in pattern:
123+
msg = "Pattern must include a regex group for the number, e.g nvidia(?P<number>\\d+)."
124+
raise ValueError(msg)
125+
126+
device_files = []
127+
if not directory.exists():
128+
return device_files
129+
130+
regex = re.compile(f"^{directory!s}/{pattern}$")
131+
for path in directory.iterdir():
132+
matched = regex.match(str(path))
133+
if not matched:
134+
continue
135+
number = matched.group("number")
136+
try:
137+
number = int(number)
138+
except ValueError:
139+
number = None
140+
device_files.append(
141+
DeviceFile(
142+
path=path,
143+
number=number,
144+
),
145+
)
146+
147+
# Sort by number in ascending order, None values at the end
148+
return sorted(
149+
device_files,
150+
key=lambda df: (df.number is None, df.number),
151+
)

gpustack_runtime/detector/ascend.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -81,15 +81,7 @@ def detect(self) -> Devices | None:
8181
for dev_card_id in card_list:
8282
device_num_in_card = pydcmi.dcmi_get_device_num_in_card(dev_card_id)
8383
for dev_device_id in range(device_num_in_card):
84-
dev_index = pydcmi.dcmi_get_device_logic_id(
85-
dev_card_id,
86-
dev_device_id,
87-
)
88-
dev_uuid = pydcmi.dcmi_get_device_die_v2(
89-
dev_card_id,
90-
dev_device_id,
91-
pydcmi.DCMI_DIE_TYPE_VDIE,
92-
)
84+
dev_is_vgpu = False
9385
dev_virt_info = _get_device_virtual_info(
9486
dev_card_id,
9587
dev_device_id,
@@ -99,13 +91,15 @@ def detect(self) -> Devices | None:
9991
and hasattr(dev_virt_info, "query_info")
10092
and hasattr(dev_virt_info.query_info, "computing")
10193
):
94+
dev_is_vgpu = True
10295
dev_cores_aicore = dev_virt_info.query_info.computing.aic
103-
dev_name = f"Ascend VDC {dev_virt_info.vdev_id}"
96+
dev_name = f"Ascend {dev_virt_info.query_info.name}"
10497
dev_mem, dev_mem_used = 0, 0
10598
if hasattr(dev_virt_info.query_info.computing, "memory_size"):
10699
dev_mem = (
107100
dev_virt_info.query_info.computing.memory_size << 20
108101
)
102+
dev_index = dev_virt_info.vdev_id
109103
else:
110104
dev_chip_info = pydcmi.dcmi_get_device_chip_info_v2(
111105
dev_card_id,
@@ -119,6 +113,19 @@ def detect(self) -> Devices | None:
119113
dev_card_id,
120114
dev_device_id,
121115
)
116+
dev_index = pydcmi.dcmi_get_device_logic_id(
117+
dev_card_id,
118+
dev_device_id,
119+
)
120+
if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
121+
dev_index = pydcmi.dcmi_get_device_phyid_from_logicid(
122+
dev_index,
123+
)
124+
dev_uuid = pydcmi.dcmi_get_device_die_v2(
125+
dev_card_id,
126+
dev_device_id,
127+
pydcmi.DCMI_DIE_TYPE_VDIE,
128+
)
122129
dev_util_aicore = pydcmi.dcmi_get_device_utilization_rate(
123130
dev_card_id,
124131
dev_device_id,
@@ -133,6 +140,7 @@ def detect(self) -> Devices | None:
133140
dev_device_id,
134141
)
135142
dev_appendix = {
143+
"vgpu": dev_is_vgpu,
136144
"card_id": dev_card_id,
137145
"device_id": dev_device_id,
138146
}
@@ -152,11 +160,7 @@ def detect(self) -> Devices | None:
152160
ret.append(
153161
Device(
154162
manufacturer=self.manufacturer,
155-
indexes=(
156-
dev_index
157-
if isinstance(dev_index, list)
158-
else [dev_index]
159-
),
163+
index=dev_index,
160164
name=dev_name,
161165
uuid=dev_uuid.upper(),
162166
driver_version=sys_driver_ver,

gpustack_runtime/detector/nvidia.py

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from .. import envs
1111
from .__types__ import Detector, Device, Devices, ManufacturerEnum
12-
from .__utils__ import get_pci_devices
12+
from .__utils__ import get_device_files, get_pci_devices
1313

1414
logger = logging.getLogger(__name__)
1515

@@ -82,23 +82,25 @@ def detect(self) -> Devices | None:
8282
dev_runtime_ver = f"{dev_runtime_ver_t[0]}.{dev_runtime_ver_t[1]}"
8383

8484
dev_count = pynvml.nvmlDeviceGetCount()
85+
dev_files = None
8586
for dev_idx in range(dev_count):
8687
dev = pynvml.nvmlDeviceGetHandleByIndex(dev_idx)
8788

88-
dev_pci_info = pynvml.nvmlDeviceGetPciInfo(dev)
8989
dev_is_vgpu = False
90+
dev_pci_info = pynvml.nvmlDeviceGetPciInfo(dev)
9091
for addr in [dev_pci_info.busIdLegacy, dev_pci_info.busId]:
9192
if addr in pci_devs:
9293
dev_is_vgpu = _is_vgpu(pci_devs[addr].config)
9394
break
9495

9596
dev_index = dev_idx
9697
if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
97-
dev_index = (
98-
dev_pci_info.bus - 1
99-
if dev_pci_info.bus > 0
100-
else dev_pci_info.bus
101-
)
98+
if dev_files is None:
99+
dev_files = get_device_files(pattern=r"nvidia(?P<number>\d+)")
100+
if len(dev_files) > dev_idx:
101+
dev_file = dev_files[dev_idx]
102+
if dev_file.number is not None:
103+
dev_index = dev_file.number
102104
dev_uuid = pynvml.nvmlDeviceGetUUID(dev)
103105
dev_cores = pynvml.nvmlDeviceGetNumGpuCores(dev)
104106
dev_mem = pynvml.nvmlDeviceGetMemoryInfo(dev)
@@ -140,11 +142,7 @@ def detect(self) -> Devices | None:
140142
ret.append(
141143
Device(
142144
manufacturer=self.manufacturer,
143-
indexes=(
144-
dev_index
145-
if isinstance(dev_index, list)
146-
else [dev_index]
147-
),
145+
index=dev_index,
148146
name=dev_name,
149147
uuid=dev_uuid.upper(),
150148
driver_version=sys_driver_ver,
@@ -176,6 +174,7 @@ def detect(self) -> Devices | None:
176174
for mdev_idx in range(mdev_count):
177175
mdev = pynvml.nvmlDeviceGetMigDeviceHandleByIndex(dev, mdev_idx)
178176

177+
mdev_index = mdev_idx
179178
mdev_uuid = pynvml.nvmlDeviceGetUUID(mdev)
180179
mdev_mem = pynvml.nvmlDeviceGetMemoryInfo(mdev)
181180
mdev_temp = pynvml.nvmlDeviceGetTemperature(
@@ -192,10 +191,6 @@ def detect(self) -> Devices | None:
192191
mdev_ci_id = pynvml.nvmlDeviceGetComputeInstanceId(mdev)
193192
mdev_appendix["compute_instance_id"] = mdev_ci_id
194193

195-
mdev_index = mdev_idx
196-
if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
197-
mdev_index = [mdev_gi_id, mdev_ci_id]
198-
199194
if not mdev_name:
200195
mdev_attrs = pynvml.nvmlDeviceGetAttributes(mdev)
201196

@@ -272,11 +267,7 @@ def detect(self) -> Devices | None:
272267
ret.append(
273268
Device(
274269
manufacturer=self.manufacturer,
275-
indexes=(
276-
mdev_index
277-
if isinstance(mdev_index, list)
278-
else [mdev_index]
279-
),
270+
index=mdev_index,
280271
name=mdev_name,
281272
uuid=mdev_uuid.upper(),
282273
driver_version=sys_driver_ver,

mkdocs.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,5 @@ copyright: Copyright &copy; 2025, GPUStack Contributors
4646
nav:
4747
- Modules:
4848
- gpustack_runtime: modules/gpustack_runtime.md
49-
- gpustack_runtime.detect: modules/gpustack_runtime.detect.md
49+
- gpustack_runtime.detector: modules/gpustack_runtime.detector.md
50+
- gpustack_runtime.deployer: modules/gpustack_runtime.deployer.md

0 commit comments

Comments
 (0)