Skip to content

Commit 6494dce

Browse files
committed
handle estimation for multipart gguf (+1 squashed commits)
Squashed commits: [c7b4af9] handle estimation for multipart gguf
1 parent 9cd6a1a commit 6494dce

File tree

1 file changed

+22
-14
lines changed

1 file changed

+22
-14
lines changed

koboldcpp.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -935,7 +935,7 @@ def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,
935935
fsize = os.path.getsize(filepath)
936936
if fsize>10000000: #dont bother with models < 10mb as they are probably bad
937937
ggufmeta = read_gguf_metadata(filepath)
938-
modelfile_extracted_meta = [ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize,ttsmodelsize] #extract done. note that meta may be null
938+
modelfile_extracted_meta = [filepath,ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize,ttsmodelsize] #extract done. note that meta may be null
939939
except Exception:
940940
modelfile_extracted_meta = None
941941

@@ -953,28 +953,36 @@ def autoset_gpu_layers(ctxsize,sdquanted,bbs): #shitty algo to determine how man
953953
if not modelfile_extracted_meta:
954954
return 0
955955
layerlimit = 0
956-
fsize = modelfile_extracted_meta[1]
956+
fsize = modelfile_extracted_meta[2]
957+
fname = modelfile_extracted_meta[0]
957958
if fsize>10000000: #dont bother with models < 10mb
958959
cs = ctxsize
959960
mem = gpumem
960-
if modelfile_extracted_meta[2] > 1024*1024*1024*5: #sdxl tax
961+
if "-00001-of-000" in fname:
962+
match = re.search(r'-(\d{5})-of-(\d{5})\.', fname)
963+
if match:
964+
total_parts = int(match.group(2))
965+
if total_parts > 1 and total_parts < 99:
966+
print("Multi-Part GGUF detected. Layer estimates may not be very accurate - recommend setting layers manually.")
967+
fsize *= total_parts
968+
if modelfile_extracted_meta[3] > 1024*1024*1024*5: #sdxl tax
961969
mem -= 1024*1024*1024*(6 if sdquanted else 9)
962-
elif modelfile_extracted_meta[2] > 1024*1024*512: #normal sd tax
970+
elif modelfile_extracted_meta[3] > 1024*1024*512: #normal sd tax
963971
mem -= 1024*1024*1024*(3.25 if sdquanted else 4.25)
964-
if modelfile_extracted_meta[3] > 1024*1024*10: #whisper tax
965-
mem -= max(350*1024*1024,modelfile_extracted_meta[3]*1.5)
966-
if modelfile_extracted_meta[4] > 1024*1024*10: #mmproj tax
972+
if modelfile_extracted_meta[4] > 1024*1024*10: #whisper tax
967973
mem -= max(350*1024*1024,modelfile_extracted_meta[4]*1.5)
968-
if modelfile_extracted_meta[5] > 1024*1024*10: #draft model tax
969-
mem -= (modelfile_extracted_meta[5] * 1.5)
970-
if modelfile_extracted_meta[6] > 1024*1024*10: #tts model tax
971-
mem -= max(600*1024*1024, modelfile_extracted_meta[6] * 3)
974+
if modelfile_extracted_meta[5] > 1024*1024*10: #mmproj tax
975+
mem -= max(350*1024*1024,modelfile_extracted_meta[5]*1.5)
976+
if modelfile_extracted_meta[6] > 1024*1024*10: #draft model tax
977+
mem -= (modelfile_extracted_meta[6] * 1.5)
978+
if modelfile_extracted_meta[7] > 1024*1024*10: #tts model tax
979+
mem -= max(600*1024*1024, modelfile_extracted_meta[7] * 3)
972980
mem = 0 if mem < 0 else mem
973981

974982
csmul = 1.0
975983
if cs:
976984
csmul = (cs/4096) if cs >= 8192 else 1.8 if cs > 4096 else 1.2 if cs > 2048 else 1.0
977-
ggufmeta = modelfile_extracted_meta[0]
985+
ggufmeta = modelfile_extracted_meta[1]
978986
if not ggufmeta or ggufmeta[0]==0: #fail to read or no layers
979987
sizeperlayer = fsize*csmul*0.052
980988
layerlimit = int(min(200,(mem-usedmem)/sizeperlayer))
@@ -4114,7 +4122,7 @@ def gui_changed_modelfile(*args):
41144122

41154123
def changed_gpulayers_estimate(*args):
41164124
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())]))
4117-
max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "")
4125+
max_gpu_layers = (f"/{modelfile_extracted_meta[1][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[1] and modelfile_extracted_meta[1][0]!=0) else "")
41184126
index = runopts_var.get()
41194127
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
41204128
layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
@@ -4125,7 +4133,7 @@ def changed_gpulayers_estimate(*args):
41254133
elif gpu_be and gpulayers_var.get()=="-1" and predicted_gpu_layers>0:
41264134
quick_layercounter_label.configure(text=f"(Auto: {predicted_gpu_layers}{max_gpu_layers} Layers)")
41274135
layercounter_label.configure(text=f"(Auto: {predicted_gpu_layers}{max_gpu_layers} Layers)")
4128-
elif gpu_be and gpulayers_var.get()=="-1" and predicted_gpu_layers<=0 and (modelfile_extracted_meta and modelfile_extracted_meta[1]):
4136+
elif gpu_be and gpulayers_var.get()=="-1" and predicted_gpu_layers<=0 and (modelfile_extracted_meta and modelfile_extracted_meta[2]):
41294137
quick_layercounter_label.configure(text="(Auto: No Offload)")
41304138
layercounter_label.configure(text="(Auto: No Offload)")
41314139
elif gpu_be and gpulayers_var.get()=="":

0 commit comments

Comments
 (0)