@@ -935,7 +935,7 @@ def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,
935935 fsize = os .path .getsize (filepath )
936936 if fsize > 10000000 : #dont bother with models < 10mb as they are probably bad
937937 ggufmeta = read_gguf_metadata (filepath )
938- modelfile_extracted_meta = [ggufmeta ,fsize ,sdfsize ,whisperfsize ,mmprojsize ,draftmodelsize ,ttsmodelsize ] #extract done. note that meta may be null
938+ modelfile_extracted_meta = [filepath , ggufmeta ,fsize ,sdfsize ,whisperfsize ,mmprojsize ,draftmodelsize ,ttsmodelsize ] #extract done. note that meta may be null
939939 except Exception :
940940 modelfile_extracted_meta = None
941941
@@ -953,28 +953,36 @@ def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how man
953953 if not modelfile_extracted_meta :
954954 return 0
955955 layerlimit = 0
956- fsize = modelfile_extracted_meta [1 ]
956+ fsize = modelfile_extracted_meta [2 ]
957+ fname = modelfile_extracted_meta [0 ]
957958 if fsize > 10000000 : #dont bother with models < 10mb
958959 cs = ctxsize
959960 mem = gpumem
960- if modelfile_extracted_meta [2 ] > 1024 * 1024 * 1024 * 5 : #sdxl tax
961+ if "-00001-of-000" in fname :
962+ match = re .search (r'-(\d{5})-of-(\d{5})\.' , fname )
963+ if match :
964+ total_parts = int (match .group (2 ))
965+ if total_parts > 1 and total_parts < 99 :
966+ print ("Multi-Part GGUF detected. Layer estimates may not be very accurate - recommend setting layers manually." )
967+ fsize *= total_parts
968+ if modelfile_extracted_meta [3 ] > 1024 * 1024 * 1024 * 5 : #sdxl tax
961969 mem -= 1024 * 1024 * 1024 * (6 if sdquanted else 9 )
962- elif modelfile_extracted_meta [2 ] > 1024 * 1024 * 512 : #normal sd tax
970+ elif modelfile_extracted_meta [3 ] > 1024 * 1024 * 512 : #normal sd tax
963971 mem -= 1024 * 1024 * 1024 * (3.25 if sdquanted else 4.25 )
964- if modelfile_extracted_meta [3 ] > 1024 * 1024 * 10 : #whisper tax
965- mem -= max (350 * 1024 * 1024 ,modelfile_extracted_meta [3 ]* 1.5 )
966- if modelfile_extracted_meta [4 ] > 1024 * 1024 * 10 : #mmproj tax
972+ if modelfile_extracted_meta [4 ] > 1024 * 1024 * 10 : #whisper tax
967973 mem -= max (350 * 1024 * 1024 ,modelfile_extracted_meta [4 ]* 1.5 )
968- if modelfile_extracted_meta [5 ] > 1024 * 1024 * 10 : #draft model tax
969- mem -= (modelfile_extracted_meta [5 ] * 1.5 )
970- if modelfile_extracted_meta [6 ] > 1024 * 1024 * 10 : #tts model tax
971- mem -= max (600 * 1024 * 1024 , modelfile_extracted_meta [6 ] * 3 )
974+ if modelfile_extracted_meta [5 ] > 1024 * 1024 * 10 : #mmproj tax
975+ mem -= max (350 * 1024 * 1024 ,modelfile_extracted_meta [5 ]* 1.5 )
976+ if modelfile_extracted_meta [6 ] > 1024 * 1024 * 10 : #draft model tax
977+ mem -= (modelfile_extracted_meta [6 ] * 1.5 )
978+ if modelfile_extracted_meta [7 ] > 1024 * 1024 * 10 : #tts model tax
979+ mem -= max (600 * 1024 * 1024 , modelfile_extracted_meta [7 ] * 3 )
972980 mem = 0 if mem < 0 else mem
973981
974982 csmul = 1.0
975983 if cs :
976984 csmul = (cs / 4096 ) if cs >= 8192 else 1.8 if cs > 4096 else 1.2 if cs > 2048 else 1.0
977- ggufmeta = modelfile_extracted_meta [0 ]
985+ ggufmeta = modelfile_extracted_meta [1 ]
978986 if not ggufmeta or ggufmeta [0 ]== 0 : #fail to read or no layers
979987 sizeperlayer = fsize * csmul * 0.052
980988 layerlimit = int (min (200 ,(mem - usedmem )/ sizeperlayer ))
@@ -4114,7 +4122,7 @@ def gui_changed_modelfile(*args):
41144122
41154123 def changed_gpulayers_estimate (* args ):
41164124 predicted_gpu_layers = autoset_gpu_layers (int (contextsize_text [context_var .get ()]),(sd_quant_var .get ()== 1 ),int (blasbatchsize_values [int (blas_size_var .get ())]))
4117- max_gpu_layers = (f"/{ modelfile_extracted_meta [0 ][0 ]+ 3 } " if (modelfile_extracted_meta and modelfile_extracted_meta [0 ] and modelfile_extracted_meta [0 ][0 ]!= 0 ) else "" )
4125+ max_gpu_layers = (f"/{ modelfile_extracted_meta [1 ][0 ]+ 3 } " if (modelfile_extracted_meta and modelfile_extracted_meta [1 ] and modelfile_extracted_meta [1 ][0 ]!= 0 ) else "" )
41184126 index = runopts_var .get ()
41194127 gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)" )
41204128 layercounter_label .grid (row = 6 , column = 1 , padx = 75 , sticky = "W" )
@@ -4125,7 +4133,7 @@ def changed_gpulayers_estimate(*args):
41254133 elif gpu_be and gpulayers_var .get ()== "-1" and predicted_gpu_layers > 0 :
41264134 quick_layercounter_label .configure (text = f"(Auto: { predicted_gpu_layers } { max_gpu_layers } Layers)" )
41274135 layercounter_label .configure (text = f"(Auto: { predicted_gpu_layers } { max_gpu_layers } Layers)" )
4128- elif gpu_be and gpulayers_var .get ()== "-1" and predicted_gpu_layers <= 0 and (modelfile_extracted_meta and modelfile_extracted_meta [1 ]):
4136+ elif gpu_be and gpulayers_var .get ()== "-1" and predicted_gpu_layers <= 0 and (modelfile_extracted_meta and modelfile_extracted_meta [2 ]):
41294137 quick_layercounter_label .configure (text = "(Auto: No Offload)" )
41304138 layercounter_label .configure (text = "(Auto: No Offload)" )
41314139 elif gpu_be and gpulayers_var .get ()== "" :
0 commit comments