Skip to content

Commit fe59f50

Browse files
committed
KCPP benchmark, Croco revision
1 parent 58eefef commit fe59f50

File tree

1 file changed

+76
-16
lines changed

1 file changed

+76
-16
lines changed

koboldcpp.py

Lines changed: 76 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4727,8 +4727,19 @@ def onready_subprocess():
47274727
if args.model_param and (args.benchmark or args.prompt):
47284728
start_server = False
47294729
save_to_file = (args.benchmark and args.benchmark!="stdout" and args.benchmark!="")
4730-
benchmaxctx = maxctx
4731-
benchlen = args.promptlimit
4730+
gpu0avram = int(MaxMemory[0]/1024/1024)
4731+
gpu1avram = int(MaxMemory[1]/1024/1024)
4732+
gpu2avram = int(MaxMemory[2]/1024/1024)
4733+
gpu3avram = int(MaxMemory[3]/1024/1024)
4734+
gpu0fvram = int(MaxFreeMemory[0]/1024/1024)
4735+
gpu1fvram = int(MaxFreeMemory[1]/1024/1024)
4736+
gpu2fvram = int(MaxFreeMemory[2]/1024/1024)
4737+
gpu3fvram = int(MaxFreeMemory[3]/1024/1024)
4738+
gpuavram = gpu0avram + gpu1avram + gpu2avram + gpu3avram
4739+
gpufvram = gpu0fvram + gpu1fvram + gpu2fvram + gpu3fvram
4740+
benchmaxctx = maxctx - 128
4741+
benchtg = args.promptlimit
4742+
benchpp = (benchmaxctx - benchtg)
47324743
benchtemp = 0.1
47334744
benchtopk = 1
47344745
benchreppen = 1
@@ -4743,8 +4754,8 @@ def onready_subprocess():
47434754
if not args.benchmark:
47444755
benchbaneos = False
47454756
if args.benchmark:
4746-
if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
4747-
print(f"\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
4757+
if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 13000000:
4758+
print(f"\nWarning: The benchmark CSV output file you selected exceeds 13MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
47484759
save_to_file = False
47494760
if save_to_file:
47504761
print(f"\nRunning benchmark (Save to File: {args.benchmark})...")
@@ -4756,7 +4767,7 @@ def onready_subprocess():
47564767
benchprompt += benchprompt
47574768
genp = {
47584769
"prompt":benchprompt,
4759-
"max_length":benchlen,
4770+
"max_length":benchtg,
47604771
"max_context_length":benchmaxctx,
47614772
"temperature":benchtemp,
47624773
"top_k":benchtopk,
@@ -4769,34 +4780,83 @@ def onready_subprocess():
47694780
restore_stdout()
47704781
print(result)
47714782
if args.benchmark:
4772-
result = (result[:8] if len(result)>8 else "") if not args.prompt else result
4773-
t_pp = float(handle.get_last_process_time())*float(benchmaxctx-benchlen)*0.001
4774-
t_gen = float(handle.get_last_eval_time())*float(benchlen)*0.001
4775-
s_pp = float(benchmaxctx-benchlen)/t_pp
4776-
s_gen = float(benchlen)/t_gen
4783+
result = (result[:4] if len(result)>4 else "") if not args.prompt else result
4784+
resultok = ((result==" 1 1") or (result=="1 1 "))
4785+
t_pp = float(handle.get_last_process_time())*float(benchpp)*0.001
4786+
t_gen = float(handle.get_last_eval_time())*float(benchtg)*0.001
4787+
s_pp = float(benchpp)/t_pp
4788+
s_gen = float(benchtg)/t_gen
47774789
datetimestamp = datetime.now(timezone.utc)
4778-
benchflagstr = f"NoAVX2={args.noavx2} Threads={args.threads} HighPriority={args.highpriority} Cublas_Args={args.usecublas} Tensor_Split={args.tensor_split} BlasThreads={args.blasthreads} BlasBatchSize={args.blasbatchsize} FlashAttention={args.flashattention} KvCache={args.quantkv}"
4790+
4791+
print(f"\nBench Completed - v{KcppVersion} ; LlamaCPP {LcppVersion}\nIf Cuda mode: {CudaSpecifics} ; Release date: {ReleaseDate}; Results:")
4792+
4793+
benchflagstr = f"NoAVX2={args.noavx2} Threads={args.threads} HighPriority={args.highpriority} NoBlas={args.noblas} Cublas_Args={args.usecublas} Offloaded layers={args.gpulayers} Tensor_Split={args.tensor_split} BlasThreads={args.blasthreads} BlasBatchSize={args.blasbatchsize} FlashAttention={args.flashattention} KvCache={args.quantkv}"
47794794
print(f"\nBenchmark Completed - v{KcppVersion} Results:\n======")
47804795
print(f"Flags: {benchflagstr}")
47814796
print(f"Timestamp: {datetimestamp}")
47824797
print(f"Backend: {libname}")
47834798
print(f"Layers: {args.gpulayers}")
47844799
print(f"Model: {benchmodel}")
4785-
print(f"MaxCtx: {benchmaxctx}")
4786-
print(f"GenAmount: {benchlen}\n-----")
4800+
print(f"NoAVX2: {args.noavx2}")
4801+
print(f"NoBlas: {args.noblas}")
4802+
print(f"NoMmap: {args.nommap}")
4803+
print(f"HighPriority: {args.highpriority}")
4804+
print(f"FlashAttention: {args.flashattention}")
4805+
print(f"Threads: {args.threads}")
4806+
CUDevicesNames.sort(reverse=True)
4807+
if gpu0avram>0:
4808+
print(f"GPU 0 Name: {CUDevicesNames[0]}")
4809+
if gpu0avram>0:
4810+
print(f"GPU 0 VRAM: {gpu0avram} MiB")
4811+
if gpu0fvram>0:
4812+
print(f"GPU 0 VRAM: {gpu0fvram} MiB")
4813+
if gpu1avram>0:
4814+
print(f"GPU 1 Name: {CUDevicesNames[1]}")
4815+
if gpu1avram>0:
4816+
print(f"GPU 1 VRAM: {gpu1avram} MiB")
4817+
if gpu1fvram>0:
4818+
print(f"GPU 1 VRAM: {gpu1fvram} MiB")
4819+
if gpu2avram>0:
4820+
print(f"GPU 2 Name: {CUDevicesNames[2]}")
4821+
if gpu2avram>0:
4822+
print(f"GPU 2 VRAM: {gpu2avram} MiB")
4823+
if gpu2fvram>0:
4824+
print(f"GPU 2 VRAM: {gpu2fvram} MiB")
4825+
if gpu3avram>0:
4826+
print(f"GPU 3 Name: {CUDevicesNames[3]}")
4827+
if gpu3avram>0:
4828+
print(f"GPU 3 VRAM: {gpu3avram} MiB")
4829+
if gpu3fvram>0:
4830+
print(f"GPU 3 VRAM: {gpu3fvram} MiB")
4831+
if gpuavram > gpu0avram:
4832+
print(f"GPUs Total VRAM: {gpuavram} MiB")
4833+
if gpufvram > gpu0fvram:
4834+
print(f"GPUs Total VRAM: {gpufvram} MiB")
4835+
print(f"Cublas_Args: {args.usecublas}")
4836+
print(f"Layers: {args.gpulayers}")
4837+
print(f"Tensor_Split: {args.tensor_split}")
4838+
print(f"BlasThreads: {args.blasthreads}")
4839+
print(f"Blas_nBatchSize: {args.blasbatchsize}")
4840+
print(f"Blas_uBatchSize: {args.blasubatchsize}")
4841+
print(f"KV_cache: {args.quantkv}")
4842+
print(f"MaxCtx: {maxctx}\n-----")
4843+
print(f"PPnum: {benchpp}")
47874844
print(f"ProcessingTime: {t_pp:.3f}s")
47884845
print(f"ProcessingSpeed: {s_pp:.2f}T/s")
4846+
print(f"TGnum: {benchtg}")
47894847
print(f"GenerationTime: {t_gen:.3f}s")
47904848
print(f"GenerationSpeed: {s_gen:.2f}T/s")
4849+
print(f"BenchmarkCtx: {benchmaxctx}")
47914850
print(f"TotalTime: {(t_pp+t_gen):.3f}s")
4792-
print(f"Output: {result}\n-----")
4851+
print(f"Output: {result}")
4852+
print(f"Coherent: {resultok}")
47934853
if save_to_file:
47944854
try:
47954855
with open(args.benchmark, "a") as file:
47964856
file.seek(0, 2)
47974857
if file.tell() == 0: #empty file
4798-
file.write(f"Timestamp,Backend,Layers,Model,MaxCtx,GenAmount,ProcessingTime,ProcessingSpeed,GenerationTime,GenerationSpeed,TotalTime,Output,Flags")
4799-
file.write(f"\n{datetimestamp},{libname},{args.gpulayers},{benchmodel},{benchmaxctx},{benchlen},{t_pp:.2f},{s_pp:.2f},{t_gen:.2f},{s_gen:.2f},{(t_pp+t_gen):.2f},{result},{benchflagstr}")
4858+
file.write(f"Datime,KCPPF,LCPP,Backend,CudaSpecifics,Model,NoAvx2,NoBlas,NoMmap,HighP,FlashA,Thrd,VRAM,FVRAM0,Layers,BlasThrd,BBSizeN,BBSizeU,KVC,PPNum,PPTime,PPSpeed,TGNum,TGTime,TGSpeed,BenchCtx,TotalTime,Coher,Tensor1,Split2,Cublas1,Argument2,Argument3,Argument4")
4859+
file.write(f"\n{ReleaseDate},{KcppVersion},{LcppVersion},{libname},{CudaSpecifics},{benchmodel},{args.noavx2},{args.noblas},{args.nommap},{args.highpriority},{args.flashattention},{args.threads},{gpuavram},{gpu0fvram},{args.gpulayers},{args.blasthreads},{args.blasbatchsize},{args.blasubatchsize},{args.quantkv},{benchpp},{t_pp:.3f},{s_pp:.2f},{benchtg},{t_gen:.3f},{s_gen:.2f},{benchmaxctx},{(t_pp+t_gen):.3f},{resultok},{args.tensor_split},,{args.usecublas},,,")
48004860
except Exception as e:
48014861
print(f"Error writing benchmark to file: {e}")
48024862
global using_gui_launcher

0 commit comments

Comments
 (0)