-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiagnostic.py
More file actions
62 lines (48 loc) · 1.9 KB
/
diagnostic.py
File metadata and controls
62 lines (48 loc) · 1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import time
import subprocess
import os
import torch # Ensure torch is installed in your venv
def get_vram_usage():
cmd = "nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits"
output = subprocess.check_output(cmd, shell=True)
return int(output.decode().strip())
def run_stress_test():
print("--- Peridot VRAM Hammer Diagnostic ---")
# 1. Baseline
baseline = get_vram_usage()
print(f"[1/4] Baseline VRAM: {baseline} MB")
# 2. Heavy VRAM Allocation
print("[2/4] Allocating 4GB of VRAM to simulate heavy Folding@home load...")
try:
# Create a large tensor on the GPU
dummy_tensor = torch.zeros((1024, 1024, 1024), device="cuda")
active_vram = get_vram_usage()
print(f" Active Stress VRAM: {active_vram} MB")
except Exception as e:
print(f" Allocation failed: {e}")
return
# 3. Trigger the Purge
print("[3/4] Triggering Prompt Simulation. Manually clearing VRAM...")
start_time = time.perf_counter()
# In the real app, this is where SIGTERM hits.
# Here, we simulate the 'Hard-Kill' by deleting the object and clearing the cache.
del dummy_tensor
torch.cuda.empty_cache()
# 4. Measuring Latency
purge_vram = get_vram_usage()
end_time = time.perf_counter()
latency = (end_time - start_time) * 1000
print(f"[4/4] Purge Complete.")
print(f" Latency: {latency:.2f} ms")
print(f" Final VRAM: {purge_vram} MB")
if purge_vram <= (baseline + 100):
print(
f"\n[RESULT] PASS: Blackwell handled the {active_vram - baseline}MB dump in {latency:.2f}ms."
)
else:
print("\n[RESULT] FAIL: VRAM fragmentation or residual allocation detected.")
if __name__ == "__main__":
if torch.cuda.is_available():
run_stress_test()
else:
print("CUDA not detected. Ensure torch is installed with CUDA support.")