Skip to content

Commit cd0210b

Browse files
committed
refactor: option to log exception
Signed-off-by: thxCode <thxcode0824@gmail.com>
1 parent 29ed3af commit cd0210b

File tree

12 files changed

+126
-37
lines changed

12 files changed

+126
-37
lines changed

gpustack_runtime/deployer/docker.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,10 @@ def is_supported() -> bool:
317317
try:
318318
supported = client.ping()
319319
except docker.errors.APIError:
320-
if logger.isEnabledFor(logging.DEBUG):
320+
if (
321+
logger.isEnabledFor(logging.DEBUG)
322+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
323+
):
321324
logger.exception("Failed to connect to Docker API server")
322325

323326
return supported
@@ -344,7 +347,10 @@ def _get_client() -> docker.DockerClient | None:
344347
else:
345348
client = docker.from_env()
346349
except docker.errors.DockerException:
347-
if logger.isEnabledFor(logging.DEBUG):
350+
if (
351+
logger.isEnabledFor(logging.DEBUG)
352+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
353+
):
348354
logger.exception("Failed to get Docker client")
349355

350356
return client
@@ -1175,14 +1181,16 @@ def _prepare_create(self):
11751181
self_container.id,
11761182
)
11771183
self_image = self_container.image
1178-
except docker.errors.APIError as e:
1179-
output_log = logger.warning
1180-
if logger.isEnabledFor(logging.DEBUG):
1181-
output_log = logger.exception
1182-
output_log(
1183-
"Mirrored deployment enabled, but failed to get self Container %s, skipping: %s",
1184+
except docker.errors.APIError:
1185+
output_logger = logger.warning
1186+
if (
1187+
logger.isEnabledFor(logging.DEBUG)
1188+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
1189+
):
1190+
output_logger = logger.exception
1191+
output_logger(
1192+
"Mirrored deployment enabled, but failed to get self Container %s, skipping",
11841193
self_container_id,
1185-
e,
11861194
)
11871195
return
11881196

gpustack_runtime/deployer/kuberentes.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,10 @@ def is_supported() -> bool:
315315
urllib3.exceptions.MaxRetryError,
316316
kubernetes.client.exceptions.ApiException,
317317
):
318-
if logger.isEnabledFor(logging.DEBUG):
318+
if (
319+
logger.isEnabledFor(logging.DEBUG)
320+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
321+
):
319322
logger.exception("Failed to connect to Kubernetes API server")
320323

321324
return supported
@@ -341,7 +344,10 @@ def _get_client() -> kubernetes.client.ApiClient | None:
341344
client = kubernetes.client.ApiClient()
342345
client.user_agent = "gpustack/runtime"
343346
except kubernetes.config.config_exception.ConfigException:
344-
if logger.isEnabledFor(logging.DEBUG):
347+
if (
348+
logger.isEnabledFor(logging.DEBUG)
349+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
350+
):
345351
logger.exception("Failed to get Kubernetes client")
346352

347353
return client
@@ -1101,10 +1107,15 @@ def _prepare_create(self):
11011107
)
11021108
except kubernetes.client.exceptions.ApiException:
11031109
output_log = logger.warning
1104-
if logger.isEnabledFor(logging.DEBUG):
1110+
if (
1111+
logger.isEnabledFor(logging.DEBUG)
1112+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
1113+
):
11051114
output_log = logger.exception
11061115
output_log(
1107-
f"Mirrored deployment enabled, but failed to get self Pod {self_pod_namespace}/{self_pod_name}, skipping",
1116+
"Mirrored deployment enabled, but failed to get self Pod %s/%s, skipping",
1117+
self_pod_namespace,
1118+
self_pod_name,
11081119
)
11091120
return
11101121
## - Get the first Container, or the Container named "default" if exists.

gpustack_runtime/detector/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@ def detect_devices(fast: bool = True) -> Devices:
9696
detect_target = envs.GPUSTACK_RUNTIME_DETECT.lower()
9797
if detect_target == det.name:
9898
raise
99-
if logger.isEnabledFor(logging.DEBUG):
99+
if (
100+
logger.isEnabledFor(logging.DEBUG)
101+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
102+
):
100103
logger.exception("Failed to detect devices for %s", det.name)
101104

102105
return devices

gpustack_runtime/detector/amd.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,10 @@ def is_supported() -> bool:
4747
pyamdsmi.amdsmi_shut_down()
4848
supported = True
4949
except pyamdsmi.AmdSmiException:
50-
if logger.isEnabledFor(logging.DEBUG):
50+
if (
51+
logger.isEnabledFor(logging.DEBUG)
52+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
53+
):
5154
logger.exception("Failed to initialize AMD SMI")
5255

5356
return supported
@@ -187,11 +190,17 @@ def detect(self) -> Devices | None:
187190
),
188191
)
189192
except pyamdsmi.AmdSmiException:
190-
if logger.isEnabledFor(logging.DEBUG):
193+
if (
194+
logger.isEnabledFor(logging.DEBUG)
195+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
196+
):
191197
logger.exception("Failed to fetch devices")
192198
raise
193199
except Exception:
194-
if logger.isEnabledFor(logging.DEBUG):
200+
if (
201+
logger.isEnabledFor(logging.DEBUG)
202+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
203+
):
195204
logger.exception("Failed to process devices fetching")
196205
raise
197206
finally:

gpustack_runtime/detector/ascend.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@ def is_supported() -> bool:
4242
pydcmi.dcmi_init()
4343
supported = True
4444
except pydcmi.DCMIError:
45-
if logger.isEnabledFor(logging.DEBUG):
45+
if (
46+
logger.isEnabledFor(logging.DEBUG)
47+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
48+
):
4649
logger.exception("Failed to initialize DCMI")
4750

4851
return supported
@@ -188,11 +191,17 @@ def detect(self) -> Devices | None:
188191
),
189192
)
190193
except pydcmi.DCMIError:
191-
if logger.isEnabledFor(logging.DEBUG):
194+
if (
195+
logger.isEnabledFor(logging.DEBUG)
196+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
197+
):
192198
logger.exception("Failed to fetch devices")
193199
raise
194200
except Exception:
195-
if logger.isEnabledFor(logging.DEBUG):
201+
if (
202+
logger.isEnabledFor(logging.DEBUG)
203+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
204+
):
196205
logger.exception("Failed to process devices fetching")
197206
raise
198207

@@ -262,7 +271,7 @@ def _get_device_roce_network_info(
262271
pydcmi.DCMI_PORT_TYPE_ROCE_PORT,
263272
)
264273
except pydcmi.DCMIError:
265-
if slogger.isEnabledFor(logging.DEBUG):
274+
if slogger.isEnabledFor(logging.DEBUG) and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION:
266275
slogger.exception("Failed to get device roce network info")
267276

268277
return ip, mask, gateway
@@ -289,7 +298,7 @@ def _get_device_virtual_info(
289298
c_vdev_query_stru,
290299
)
291300
except pydcmi.DCMIError:
292-
if slogger.isEnabledFor(logging.DEBUG):
301+
if slogger.isEnabledFor(logging.DEBUG) and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION:
293302
slogger.exception("Failed to get device virtual info")
294303
else:
295304
return c_vdev_query_stru

gpustack_runtime/detector/cambricon.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,10 @@ def detect(self) -> Devices | None:
123123
)
124124

125125
except Exception:
126-
if logger.isEnabledFor(logging.DEBUG):
126+
if (
127+
logger.isEnabledFor(logging.DEBUG)
128+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
129+
):
127130
logger.exception("Failed to process devices fetching")
128131
raise
129132

gpustack_runtime/detector/hygon.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,10 @@ def is_supported() -> bool:
4747
pyrocmsmi.rsmi_init()
4848
supported = True
4949
except pyrocmsmi.ROCMSMIError:
50-
if logger.isEnabledFor(logging.DEBUG):
50+
if (
51+
logger.isEnabledFor(logging.DEBUG)
52+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
53+
):
5154
logger.exception("Failed to initialize ROCM SMI")
5255

5356
return supported
@@ -154,11 +157,17 @@ def detect(self) -> Devices | None:
154157
),
155158
)
156159
except pyrocmsmi.ROCMSMIError:
157-
if logger.isEnabledFor(logging.DEBUG):
160+
if (
161+
logger.isEnabledFor(logging.DEBUG)
162+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
163+
):
158164
logger.exception("Failed to fetch devices")
159165
raise
160166
except Exception:
161-
if logger.isEnabledFor(logging.DEBUG):
167+
if (
168+
logger.isEnabledFor(logging.DEBUG)
169+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
170+
):
162171
logger.exception("Failed to process devices fetching")
163172
raise
164173

gpustack_runtime/detector/iluvatar.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,10 @@ def detect(self) -> Devices | None:
165165
)
166166

167167
except Exception:
168-
if logger.isEnabledFor(logging.DEBUG):
168+
if (
169+
logger.isEnabledFor(logging.DEBUG)
170+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
171+
):
169172
logger.exception("Failed to process devices fetching")
170173
raise
171174

gpustack_runtime/detector/metax.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,10 @@ def is_supported() -> bool:
4646
pymxsml.mxSmlInit()
4747
supported = True
4848
except pymxsml.MXSMLError:
49-
if logger.isEnabledFor(logging.DEBUG):
49+
if (
50+
logger.isEnabledFor(logging.DEBUG)
51+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
52+
):
5053
logger.exception("Failed to initialize MXSML")
5154

5255
return supported
@@ -159,11 +162,17 @@ def detect(self) -> Devices | None:
159162
)
160163

161164
except pymxsml.MXSMLError:
162-
if logger.isEnabledFor(logging.DEBUG):
165+
if (
166+
logger.isEnabledFor(logging.DEBUG)
167+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
168+
):
163169
logger.exception("Failed to fetch devices")
164170
raise
165171
except Exception:
166-
if logger.isEnabledFor(logging.DEBUG):
172+
if (
173+
logger.isEnabledFor(logging.DEBUG)
174+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
175+
):
167176
logger.exception("Failed to process devices fetching")
168177
raise
169178

gpustack_runtime/detector/mthreads.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,10 @@ def is_supported() -> bool:
4141
pymtml.mtmlLibraryShutDown()
4242
supported = True
4343
except pymtml.MTMLError:
44-
if logger.isEnabledFor(logging.DEBUG):
44+
if (
45+
logger.isEnabledFor(logging.DEBUG)
46+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
47+
):
4548
logger.exception("Failed to initialize MTML")
4649

4750
return supported
@@ -152,11 +155,17 @@ def detect(self) -> Devices | None:
152155
)
153156

154157
except pymtml.MTMLError:
155-
if logger.isEnabledFor(logging.DEBUG):
158+
if (
159+
logger.isEnabledFor(logging.DEBUG)
160+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
161+
):
156162
logger.exception("Failed to fetch devices")
157163
raise
158164
except Exception:
159-
if logger.isEnabledFor(logging.DEBUG):
165+
if (
166+
logger.isEnabledFor(logging.DEBUG)
167+
and envs.GPUSTACK_RUNTIME_LOG_EXCEPTION
168+
):
160169
logger.exception("Failed to process devices fetching")
161170
raise
162171
finally:

0 commit comments

Comments
 (0)