@@ -4727,8 +4727,19 @@ def onready_subprocess():
47274727 if args .model_param and (args .benchmark or args .prompt ):
47284728 start_server = False
47294729 save_to_file = (args .benchmark and args .benchmark != "stdout" and args .benchmark != "" )
4730- benchmaxctx = maxctx
4731- benchlen = args .promptlimit
4730+ gpu0avram = int (MaxMemory [0 ]/ 1024 / 1024 )
4731+ gpu1avram = int (MaxMemory [1 ]/ 1024 / 1024 )
4732+ gpu2avram = int (MaxMemory [2 ]/ 1024 / 1024 )
4733+ gpu3avram = int (MaxMemory [3 ]/ 1024 / 1024 )
4734+ gpu0fvram = int (MaxFreeMemory [0 ]/ 1024 / 1024 )
4735+ gpu1fvram = int (MaxFreeMemory [1 ]/ 1024 / 1024 )
4736+ gpu2fvram = int (MaxFreeMemory [2 ]/ 1024 / 1024 )
4737+ gpu3fvram = int (MaxFreeMemory [3 ]/ 1024 / 1024 )
4738+ gpuavram = gpu0avram + gpu1avram + gpu2avram + gpu3avram
4739+ gpufvram = gpu0fvram + gpu1fvram + gpu2fvram + gpu3fvram
4740+ benchmaxctx = maxctx - 128
4741+ benchtg = args .promptlimit
4742+ benchpp = (benchmaxctx - benchtg )
47324743 benchtemp = 0.1
47334744 benchtopk = 1
47344745 benchreppen = 1
@@ -4743,8 +4754,8 @@ def onready_subprocess():
47434754 if not args .benchmark :
47444755 benchbaneos = False
47454756 if args .benchmark :
4746- if os .path .exists (args .benchmark ) and os .path .getsize (args .benchmark ) > 1000000 :
4747- print (f"\n Warning: The benchmark CSV output file you selected exceeds 1MB . This is probably not what you want, did you select the wrong CSV file?\n For safety, benchmark output will not be saved." )
4757+ if os .path .exists (args .benchmark ) and os .path .getsize (args .benchmark ) > 13000000 :
4758+ print (f"\n Warning: The benchmark CSV output file you selected exceeds 13MB . This is probably not what you want, did you select the wrong CSV file?\n For safety, benchmark output will not be saved." )
47484759 save_to_file = False
47494760 if save_to_file :
47504761 print (f"\n Running benchmark (Save to File: { args .benchmark } )..." )
@@ -4756,7 +4767,7 @@ def onready_subprocess():
47564767 benchprompt += benchprompt
47574768 genp = {
47584769 "prompt" :benchprompt ,
4759- "max_length" :benchlen ,
4770+ "max_length" :benchtg ,
47604771 "max_context_length" :benchmaxctx ,
47614772 "temperature" :benchtemp ,
47624773 "top_k" :benchtopk ,
@@ -4769,34 +4780,83 @@ def onready_subprocess():
47694780 restore_stdout ()
47704781 print (result )
47714782 if args .benchmark :
4772- result = (result [:8 ] if len (result )> 8 else "" ) if not args .prompt else result
4773- t_pp = float (handle .get_last_process_time ())* float (benchmaxctx - benchlen )* 0.001
4774- t_gen = float (handle .get_last_eval_time ())* float (benchlen )* 0.001
4775- s_pp = float (benchmaxctx - benchlen )/ t_pp
4776- s_gen = float (benchlen )/ t_gen
4783+ result = (result [:4 ] if len (result )> 4 else "" ) if not args .prompt else result
4784+ resultok = ((result == " 1 1" ) or (result == "1 1 " ))
4785+ t_pp = float (handle .get_last_process_time ())* float (benchpp )* 0.001
4786+ t_gen = float (handle .get_last_eval_time ())* float (benchtg )* 0.001
4787+ s_pp = float (benchpp )/ t_pp
4788+ s_gen = float (benchtg )/ t_gen
47774789 datetimestamp = datetime .now (timezone .utc )
4778- benchflagstr = f"NoAVX2={ args .noavx2 } Threads={ args .threads } HighPriority={ args .highpriority } Cublas_Args={ args .usecublas } Tensor_Split={ args .tensor_split } BlasThreads={ args .blasthreads } BlasBatchSize={ args .blasbatchsize } FlashAttention={ args .flashattention } KvCache={ args .quantkv } "
4790+
4791+ print (f"\n Bench Completed - v{ KcppVersion } ; LlamaCPP { LcppVersion } \n If Cuda mode: { CudaSpecifics } ; Release date: { ReleaseDate } ; Results:" )
4792+
4793+ benchflagstr = f"NoAVX2={ args .noavx2 } Threads={ args .threads } HighPriority={ args .highpriority } NoBlas={ args .noblas } Cublas_Args={ args .usecublas } Offloaded layers={ args .gpulayers } Tensor_Split={ args .tensor_split } BlasThreads={ args .blasthreads } BlasBatchSize={ args .blasbatchsize } FlashAttention={ args .flashattention } KvCache={ args .quantkv } "
47794794 print (f"\n Benchmark Completed - v{ KcppVersion } Results:\n ======" )
47804795 print (f"Flags: { benchflagstr } " )
47814796 print (f"Timestamp: { datetimestamp } " )
47824797 print (f"Backend: { libname } " )
47834798 print (f"Layers: { args .gpulayers } " )
47844799 print (f"Model: { benchmodel } " )
4785- print (f"MaxCtx: { benchmaxctx } " )
4786- print (f"GenAmount: { benchlen } \n -----" )
4800+ print (f"NoAVX2: { args .noavx2 } " )
4801+ print (f"NoBlas: { args .noblas } " )
4802+ print (f"NoMmap: { args .nommap } " )
4803+ print (f"HighPriority: { args .highpriority } " )
4804+ print (f"FlashAttention: { args .flashattention } " )
4805+ print (f"Threads: { args .threads } " )
4806+ CUDevicesNames .sort (reverse = True )
4807+ if gpu0avram > 0 :
4808+ print (f"GPU 0 Name: { CUDevicesNames [0 ]} " )
4809+ if gpu0avram > 0 :
4810+ print (f"GPU 0 VRAM: { gpu0avram } MiB" )
4811+ if gpu0fvram > 0 :
4812+ print (f"GPU 0 VRAM: { gpu0fvram } MiB" )
4813+ if gpu1avram > 0 :
4814+ print (f"GPU 1 Name: { CUDevicesNames [1 ]} " )
4815+ if gpu1avram > 0 :
4816+ print (f"GPU 1 VRAM: { gpu1avram } MiB" )
4817+ if gpu1fvram > 0 :
4818+ print (f"GPU 1 VRAM: { gpu1fvram } MiB" )
4819+ if gpu2avram > 0 :
4820+ print (f"GPU 2 Name: { CUDevicesNames [2 ]} " )
4821+ if gpu2avram > 0 :
4822+ print (f"GPU 2 VRAM: { gpu2avram } MiB" )
4823+ if gpu2fvram > 0 :
4824+ print (f"GPU 2 VRAM: { gpu2fvram } MiB" )
4825+ if gpu3avram > 0 :
4826+ print (f"GPU 3 Name: { CUDevicesNames [3 ]} " )
4827+ if gpu3avram > 0 :
4828+ print (f"GPU 3 VRAM: { gpu3avram } MiB" )
4829+ if gpu3fvram > 0 :
4830+ print (f"GPU 3 VRAM: { gpu3fvram } MiB" )
4831+ if gpuavram > gpu0avram :
4832+ print (f"GPUs Total VRAM: { gpuavram } MiB" )
4833+ if gpufvram > gpu0fvram :
4834+ print (f"GPUs Total VRAM: { gpufvram } MiB" )
4835+ print (f"Cublas_Args: { args .usecublas } " )
4836+ print (f"Layers: { args .gpulayers } " )
4837+ print (f"Tensor_Split: { args .tensor_split } " )
4838+ print (f"BlasThreads: { args .blasthreads } " )
4839+ print (f"Blas_nBatchSize: { args .blasbatchsize } " )
4840+ print (f"Blas_uBatchSize: { args .blasubatchsize } " )
4841+ print (f"KV_cache: { args .quantkv } " )
4842+ print (f"MaxCtx: { maxctx } \n -----" )
4843+ print (f"PPnum: { benchpp } " )
47874844 print (f"ProcessingTime: { t_pp :.3f} s" )
47884845 print (f"ProcessingSpeed: { s_pp :.2f} T/s" )
4846+ print (f"TGnum: { benchtg } " )
47894847 print (f"GenerationTime: { t_gen :.3f} s" )
47904848 print (f"GenerationSpeed: { s_gen :.2f} T/s" )
4849+ print (f"BenchmarkCtx: { benchmaxctx } " )
47914850 print (f"TotalTime: { (t_pp + t_gen ):.3f} s" )
4792- print (f"Output: { result } \n -----" )
4851+ print (f"Output: { result } " )
4852+ print (f"Coherent: { resultok } " )
47934853 if save_to_file :
47944854 try :
47954855 with open (args .benchmark , "a" ) as file :
47964856 file .seek (0 , 2 )
47974857 if file .tell () == 0 : #empty file
4798- file .write (f"Timestamp, Backend,Layers ,Model,MaxCtx,GenAmount,ProcessingTime,ProcessingSpeed,GenerationTime,GenerationSpeed, TotalTime,Output,Flags " )
4799- file .write (f"\n { datetimestamp } ,{ libname } ,{ args .gpulayers } ,{ benchmodel } ,{ benchmaxctx } ,{ benchlen } ,{ t_pp :.2f } ,{ s_pp :.2f} ,{ t_gen :.2f } ,{ s_gen :.2f} ,{ (t_pp + t_gen ):.2f } ,{ result } ,{ benchflagstr } " )
4858+ file .write (f"Datime,KCPPF,LCPP, Backend,CudaSpecifics ,Model,NoAvx2,NoBlas,NoMmap,HighP,FlashA,Thrd,VRAM,FVRAM0,Layers,BlasThrd,BBSizeN,BBSizeU,KVC,PPNum,PPTime,PPSpeed,TGNum,TGTime,TGSpeed,BenchCtx, TotalTime,Coher,Tensor1,Split2,Cublas1,Argument2,Argument3,Argument4 " )
4859+ file .write (f"\n { ReleaseDate } ,{ KcppVersion } , { LcppVersion } , { libname } ,{ CudaSpecifics } , { benchmodel } , { args .noavx2 } , { args . noblas } , { args . nommap } , { args . highpriority } , { args . flashattention } , { args . threads } , { gpuavram } , { gpu0fvram } , { args . gpulayers } ,{ args . blasthreads } ,{ args . blasbatchsize } ,{ args . blasubatchsize } ,{ args . quantkv } , { benchpp } , { t_pp :.3f } ,{ s_pp :.2f} ,{ benchtg } , { t_gen :.3f } ,{ s_gen :.2f} ,{ benchmaxctx } , { (t_pp + t_gen ):.3f } ,{ resultok } ,{ args . tensor_split } ,, { args . usecublas } ,,, " )
48004860 except Exception as e :
48014861 print (f"Error writing benchmark to file: { e } " )
48024862 global using_gui_launcher
0 commit comments