Skip to content

Commit cb9bd2f

Browse files
authored
fix automatic VRAM detection for ROCm and Vulkan backends (LostRuins#1715)
* use rocminfo for ROCm VRAM detection * vulkan VRAM detection needs to consider all heaps, don't print that we're unable to detect VRAM until all detection is ran
1 parent 7b396bd commit cb9bd2f

File tree

1 file changed

+43
-18
lines changed

1 file changed

+43
-18
lines changed

koboldcpp.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,22 +1195,45 @@ def fetch_gpu_properties(testCL,testCU,testVK):
11951195
FetchedCUfreeMem = []
11961196
pass
11971197
if len(FetchedCUdevices)==0:
1198-
try: # Get AMD ROCm GPU names
1198+
try: # Get AMD ROCm GPU names and VRAM from rocminfo
11991199
output = subprocess.run(['rocminfo'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout
12001200
device_name = None
1201+
current_agent_is_gpu = False
1202+
in_pool_section = False
1203+
12011204
for line in output.splitlines(): # read through the output line by line
12021205
line = line.strip()
1203-
if line.startswith("Marketing Name:"):
1206+
if line.startswith("Agent ") and "Agent" in line:
1207+
# Reset state for new agent
1208+
device_name = None
1209+
current_agent_is_gpu = False
1210+
in_pool_section = False
1211+
elif line.startswith("Marketing Name:"):
12041212
device_name = line.split(":", 1)[1].strip() # if we find a named device, temporarily save the name
1205-
elif line.startswith("Device Type:") and "GPU" in line and device_name is not None: # if the following Device Type is a GPU (not a CPU) then add it to devices list
1213+
elif line.startswith("Device Type:") and "GPU" in line and device_name is not None:
1214+
# if the following Device Type is a GPU (not a CPU) then add it to devices list
12061215
FetchedCUdevices.append(device_name)
1216+
current_agent_is_gpu = True
12071217
AMDgpu = True
12081218
elif line.startswith("Device Type:") and "GPU" not in line:
12091219
device_name = None
1210-
if FetchedCUdevices:
1211-
getamdvram = subprocess.run(['rocm-smi', '--showmeminfo', 'vram', '--csv'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout # fetch VRAM of devices
1212-
if getamdvram:
1213-
FetchedCUdeviceMem = [line.split(",")[1].strip() for line in getamdvram.splitlines()[1:] if line.strip()]
1220+
current_agent_is_gpu = False
1221+
elif line.startswith("Pool Info:") and current_agent_is_gpu:
1222+
in_pool_section = True
1223+
elif in_pool_section and current_agent_is_gpu and line.startswith("Segment:") and "GLOBAL" in line and "COARSE GRAINED" in line:
1224+
# This is the main VRAM pool for this GPU
1225+
continue
1226+
elif in_pool_section and current_agent_is_gpu and line.startswith("Size:"):
1227+
# Extract VRAM size in KB and convert to MB
1228+
size_match = re.search(r'(\d+)\(0x[0-9a-fA-F]+\)\s*KB', line)
1229+
if size_match:
1230+
vram_kb = int(size_match.group(1))
1231+
vram_mb = vram_kb // 1024
1232+
FetchedCUdeviceMem.append(str(vram_mb))
1233+
in_pool_section = False
1234+
1235+
if FetchedCUdevices and FetchedCUdeviceMem:
1236+
print(f"Detected AMD GPU VRAM from rocminfo: {list(zip(FetchedCUdevices, FetchedCUdeviceMem))} MB")
12141237
except Exception:
12151238
FetchedCUdeviceMem = []
12161239
FetchedCUfreeMem = []
@@ -1236,12 +1259,10 @@ def fetch_gpu_properties(testCL,testCU,testVK):
12361259
MaxMemory[0] = max(lowestcumem,MaxMemory[0])
12371260
MaxFreeMemory[0] = max(lowestfreecumem,MaxFreeMemory[0])
12381261

1239-
if MaxMemory[0] < (1024*1024*256):
1240-
print("Unable to detect VRAM, please set layers manually.")
1241-
12421262
if testVK:
12431263
try: # Get Vulkan names
12441264
foundVkGPU = False
1265+
lowestvkmem = 0
12451266
output = subprocess.run(['vulkaninfo','--summary'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout
12461267
devicelist = [line.split("=")[1].strip() for line in output.splitlines() if "deviceName" in line]
12471268
devicetypes = [line.split("=")[1].strip() for line in output.splitlines() if "deviceType" in line]
@@ -1265,16 +1286,15 @@ def fetch_gpu_properties(testCL,testCU,testVK):
12651286
output = subprocess.run(['vulkaninfo'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout
12661287
devicechunks = output.split("VkPhysicalDeviceMemoryProperties")[1:]
12671288
gpuidx = 0
1268-
lowestvkmem = 0
12691289
for chunk in devicechunks:
12701290
heaps = chunk.split("memoryTypes:")[0].split("memoryHeaps[")[1:]
1271-
snippet = heaps[0]
1272-
if "MEMORY_HEAP_DEVICE_LOCAL_BIT" in snippet and "size" in snippet:
1273-
match = re.search(r"size\s*=\s*(\d+)", snippet)
1274-
if match:
1275-
dmem = int(match.group(1))
1276-
if dmem > gpumem_ignore_limit_min and dmem < gpumem_ignore_limit_max:
1277-
lowestvkmem = dmem if lowestvkmem==0 else (dmem if dmem<lowestvkmem else lowestvkmem)
1291+
for heap in heaps: # Check all heaps, not just the first one
1292+
if "MEMORY_HEAP_DEVICE_LOCAL_BIT" in heap and "size" in heap:
1293+
match = re.search(r"size\s*=\s*(\d+)", heap)
1294+
if match:
1295+
dmem = int(match.group(1))
1296+
if dmem > gpumem_ignore_limit_min and dmem < gpumem_ignore_limit_max:
1297+
lowestvkmem = dmem if lowestvkmem==0 else (dmem if dmem<lowestvkmem else lowestvkmem)
12781298
gpuidx += 1
12791299
except Exception: # failed to get vulkan vram
12801300
pass
@@ -1311,6 +1331,11 @@ def fetch_gpu_properties(testCL,testCU,testVK):
13111331
MaxMemory[0] = max(lowestclmem,MaxMemory[0])
13121332
except Exception:
13131333
pass
1334+
1335+
# Check VRAM detection after all backends have been tested
1336+
if MaxMemory[0] < (1024*1024*256):
1337+
print("Unable to detect VRAM, please set layers manually.")
1338+
13141339
return
13151340

13161341
def auto_set_backend_cli():

0 commit comments

Comments
 (0)