Skip to content

Commit 9e8bdb1

Browse files
committed
fix: unexpect error report in amd
Signed-off-by: thxCode <[email protected]>
1 parent 98e3466 commit 9e8bdb1

File tree

2 files changed

+7
-4
lines changed

2 files changed

+7
-4
lines changed

gpustack_runtime/detector/amd.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,6 @@ def detect(self) -> Devices | None:
104104
dev_card_id = None
105105
if dev_hsa_agent:
106106
dev_card_id = dev_hsa_agent.driver_node_id
107-
elif hasattr(pyamdsmi, "amdsmi_get_gpu_kfd_info"):
108-
dev_kfd_info = pyamdsmi.amdsmi_get_gpu_kfd_info(dev)
109-
dev_card_id = dev_kfd_info.get("node_id")
110107
else:
111108
with contextlib.suppress(pyrocmsmi.ROCMSMIError):
112109
pyrocmsmi.rsmi_init()

gpustack_runtime/detector/pyamdgpu/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
## Error Codes ##
3535
AMDGPU_SUCCESS = 0
36+
AMDGPU_ERROR_CARD_NOTFOUND = -99996
3637
AMDGPU_ERROR_UNINITIALIZED = -99997
3738
AMDGPU_ERROR_FUNCTION_NOT_FOUND = -99998
3839
AMDGPU_ERROR_LIBRARY_NOT_FOUND = -99999
@@ -44,6 +45,7 @@
4445

4546
class AMDGPUError(Exception):
4647
_extend_errcode_to_string: ClassVar[dict[int, str]] = {
48+
AMDGPU_ERROR_CARD_NOTFOUND: "Card Not Found",
4749
AMDGPU_ERROR_UNINITIALIZED: "Library Not Initialized",
4850
AMDGPU_ERROR_FUNCTION_NOT_FOUND: "Function Not Found",
4951
AMDGPU_ERROR_LIBRARY_NOT_FOUND: "Library Not Found",
@@ -240,7 +242,11 @@ def _LoadAMDGPULibrary():
240242
def amdgpu_device_initialize(card=1):
241243
_LoadAMDGPULibrary()
242244

243-
fd = os.open(f"/dev/dri/card{card}", os.O_RDONLY)
245+
try:
246+
fd = os.open(f"/dev/dri/card{card}", os.O_RDONLY)
247+
except FileNotFoundError:
248+
raise AMDGPUError(AMDGPU_ERROR_CARD_NOTFOUND)
249+
244250
c_major = c_uint32()
245251
c_minor = c_uint32()
246252
device = c_amdgpu_device_t()

0 commit comments

Comments
 (0)