Skip to content

Commit 9a0e071

Browse files
Don't fail checking GPU memory (#525)
1 parent fe56840 commit 9a0e071

File tree

2 files changed

+20
-12
lines changed

2 files changed

+20
-12
lines changed

model-engine/model_engine_server/inference/batch_inference/vllm_batch.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,8 @@ def get_gpu_free_memory(): # pragma: no cover
479479
).stdout
480480
gpu_memory = [int(x) for x in output.strip().split("\n")]
481481
return gpu_memory
482-
except subprocess.CalledProcessError:
482+
except Exception as e:
483+
print(f"Error getting GPU memory: {e}")
483484
return None
484485

485486

@@ -494,11 +495,14 @@ def check_unknown_startup_memory_usage(): # pragma: no cover
494495
print(
495496
f"WARNING: Unbalanced GPU memory usage at start up. This may cause OOM. Memory usage per GPU in MB: {gpu_free_memory}."
496497
)
497-
# nosemgrep
498-
output = subprocess.run(
499-
["fuser -v /dev/nvidia*"], shell=True, capture_output=True, text=True
500-
).stdout
501-
print(f"Processes using GPU: {output}")
498+
try:
499+
# nosemgrep
500+
output = subprocess.run(
501+
["fuser -v /dev/nvidia*"], shell=True, capture_output=True, text=True
502+
).stdout
503+
print(f"Processes using GPU: {output}")
504+
except Exception as e:
505+
print(f"Error getting processes using GPU: {e}")
502506

503507

504508
if __name__ == "__main__":

model-engine/model_engine_server/inference/vllm/vllm_server.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,8 @@ def get_gpu_free_memory():
140140
).stdout
141141
gpu_memory = [int(x) for x in output.strip().split("\n")]
142142
return gpu_memory
143-
except subprocess.CalledProcessError:
143+
except Exception as e:
144+
print(f"Error getting GPU memory: {e}")
144145
return None
145146

146147

@@ -154,11 +155,14 @@ def check_unknown_startup_memory_usage():
154155
print(
155156
f"WARNING: Unbalanced GPU memory usage at start up. This may cause OOM. Memory usage per GPU in MB: {gpu_free_memory}."
156157
)
157-
# nosemgrep
158-
output = subprocess.run(
159-
["fuser -v /dev/nvidia*"], shell=True, capture_output=True, text=True
160-
).stdout
161-
print(f"Processes using GPU: {output}")
158+
try:
159+
# nosemgrep
160+
output = subprocess.run(
161+
["fuser -v /dev/nvidia*"], shell=True, capture_output=True, text=True
162+
).stdout
163+
print(f"Processes using GPU: {output}")
164+
except Exception as e:
165+
print(f"Error getting processes using GPU: {e}")
162166

163167

164168
def debug(sig, frame):

0 commit comments

Comments
 (0)