@@ -1195,22 +1195,45 @@ def fetch_gpu_properties(testCL,testCU,testVK):
11951195 FetchedCUfreeMem = []
11961196 pass
11971197 if len (FetchedCUdevices )== 0 :
1198- try : # Get AMD ROCm GPU names
1198+ try : # Get AMD ROCm GPU names and VRAM from rocminfo
11991199 output = subprocess .run (['rocminfo' ], capture_output = True , text = True , check = True , encoding = 'utf-8' , timeout = 10 ).stdout
12001200 device_name = None
1201+ current_agent_is_gpu = False
1202+ in_pool_section = False
1203+
12011204 for line in output .splitlines (): # read through the output line by line
12021205 line = line .strip ()
1203- if line .startswith ("Marketing Name:" ):
1206+ if line .startswith ("Agent " ) and "Agent" in line :
1207+ # Reset state for new agent
1208+ device_name = None
1209+ current_agent_is_gpu = False
1210+ in_pool_section = False
1211+ elif line .startswith ("Marketing Name:" ):
12041212 device_name = line .split (":" , 1 )[1 ].strip () # if we find a named device, temporarily save the name
1205- elif line .startswith ("Device Type:" ) and "GPU" in line and device_name is not None : # if the following Device Type is a GPU (not a CPU) then add it to devices list
1213+ elif line .startswith ("Device Type:" ) and "GPU" in line and device_name is not None :
1214+ # if the following Device Type is a GPU (not a CPU) then add it to devices list
12061215 FetchedCUdevices .append (device_name )
1216+ current_agent_is_gpu = True
12071217 AMDgpu = True
12081218 elif line .startswith ("Device Type:" ) and "GPU" not in line :
12091219 device_name = None
1210- if FetchedCUdevices :
1211- getamdvram = subprocess .run (['rocm-smi' , '--showmeminfo' , 'vram' , '--csv' ], capture_output = True , text = True , check = True , encoding = 'utf-8' , timeout = 10 ).stdout # fetch VRAM of devices
1212- if getamdvram :
1213- FetchedCUdeviceMem = [line .split ("," )[1 ].strip () for line in getamdvram .splitlines ()[1 :] if line .strip ()]
1220+ current_agent_is_gpu = False
1221+ elif line .startswith ("Pool Info:" ) and current_agent_is_gpu :
1222+ in_pool_section = True
1223+ elif in_pool_section and current_agent_is_gpu and line .startswith ("Segment:" ) and "GLOBAL" in line and "COARSE GRAINED" in line :
1224+ # This is the main VRAM pool for this GPU
1225+ continue
1226+ elif in_pool_section and current_agent_is_gpu and line .startswith ("Size:" ):
1227+ # Extract VRAM size in KB and convert to MB
1228+ size_match = re .search (r'(\d+)\(0x[0-9a-fA-F]+\)\s*KB' , line )
1229+ if size_match :
1230+ vram_kb = int (size_match .group (1 ))
1231+ vram_mb = vram_kb // 1024
1232+ FetchedCUdeviceMem .append (str (vram_mb ))
1233+ in_pool_section = False
1234+
1235+ if FetchedCUdevices and FetchedCUdeviceMem :
1236+ print (f"Detected AMD GPU VRAM from rocminfo: { list (zip (FetchedCUdevices , FetchedCUdeviceMem ))} MB" )
12141237 except Exception :
12151238 FetchedCUdeviceMem = []
12161239 FetchedCUfreeMem = []
@@ -1236,12 +1259,10 @@ def fetch_gpu_properties(testCL,testCU,testVK):
12361259 MaxMemory [0 ] = max (lowestcumem ,MaxMemory [0 ])
12371260 MaxFreeMemory [0 ] = max (lowestfreecumem ,MaxFreeMemory [0 ])
12381261
1239- if MaxMemory [0 ] < (1024 * 1024 * 256 ):
1240- print ("Unable to detect VRAM, please set layers manually." )
1241-
12421262 if testVK :
12431263 try : # Get Vulkan names
12441264 foundVkGPU = False
1265+ lowestvkmem = 0
12451266 output = subprocess .run (['vulkaninfo' ,'--summary' ], capture_output = True , text = True , check = True , encoding = 'utf-8' , timeout = 10 ).stdout
12461267 devicelist = [line .split ("=" )[1 ].strip () for line in output .splitlines () if "deviceName" in line ]
12471268 devicetypes = [line .split ("=" )[1 ].strip () for line in output .splitlines () if "deviceType" in line ]
@@ -1265,16 +1286,15 @@ def fetch_gpu_properties(testCL,testCU,testVK):
12651286 output = subprocess .run (['vulkaninfo' ], capture_output = True , text = True , check = True , encoding = 'utf-8' , timeout = 10 ).stdout
12661287 devicechunks = output .split ("VkPhysicalDeviceMemoryProperties" )[1 :]
12671288 gpuidx = 0
1268- lowestvkmem = 0
12691289 for chunk in devicechunks :
12701290 heaps = chunk .split ("memoryTypes:" )[0 ].split ("memoryHeaps[" )[1 :]
1271- snippet = heaps [ 0 ]
1272- if "MEMORY_HEAP_DEVICE_LOCAL_BIT" in snippet and "size" in snippet :
1273- match = re .search (r"size\s*=\s*(\d+)" , snippet )
1274- if match :
1275- dmem = int (match .group (1 ))
1276- if dmem > gpumem_ignore_limit_min and dmem < gpumem_ignore_limit_max :
1277- lowestvkmem = dmem if lowestvkmem == 0 else (dmem if dmem < lowestvkmem else lowestvkmem )
1291+ for heap in heaps : # Check all heaps, not just the first one
1292+ if "MEMORY_HEAP_DEVICE_LOCAL_BIT" in heap and "size" in heap :
1293+ match = re .search (r"size\s*=\s*(\d+)" , heap )
1294+ if match :
1295+ dmem = int (match .group (1 ))
1296+ if dmem > gpumem_ignore_limit_min and dmem < gpumem_ignore_limit_max :
1297+ lowestvkmem = dmem if lowestvkmem == 0 else (dmem if dmem < lowestvkmem else lowestvkmem )
12781298 gpuidx += 1
12791299 except Exception : # failed to get vulkan vram
12801300 pass
@@ -1311,6 +1331,11 @@ def fetch_gpu_properties(testCL,testCU,testVK):
13111331 MaxMemory [0 ] = max (lowestclmem ,MaxMemory [0 ])
13121332 except Exception :
13131333 pass
1334+
1335+ # Check VRAM detection after all backends have been tested
1336+ if MaxMemory [0 ] < (1024 * 1024 * 256 ):
1337+ print ("Unable to detect VRAM, please set layers manually." )
1338+
13141339 return
13151340
13161341def auto_set_backend_cli ():
0 commit comments