You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
(time ./bin/llama-cli -no-cnv --model ${model_bf16} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-bf16.log
376
-
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q8_0.log
377
-
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_0.log
378
-
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_1.log
379
-
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_0.log
380
-
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_1.log
381
-
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q2_k.log
382
-
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q3_k.log
383
-
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_k.log
384
-
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_k.log
385
-
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q6_k.log
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 2048 -fa off ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
402
-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 2048 -fa on ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
403
-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 2048 -fa off ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
404
-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 2048 -fa on ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
377
+
(time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-f16.log
378
+
(time ./bin/llama-cli -no-cnv --model ${model_bf16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-bf16.log
379
+
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q8_0.log
380
+
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_0.log
381
+
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_1.log
382
+
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_0.log
383
+
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_1.log
384
+
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q2_k.log
385
+
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q3_k.log
386
+
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_k.log
387
+
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_k.log
388
+
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q6_k.log
389
+
390
+
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-f16.log
391
+
if [ -z${GG_BUILD_NO_BF16} ];then
392
+
(time ./bin/llama-perplexity --model ${model_bf16} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-bf16.log
393
+
fi
394
+
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q8_0.log
395
+
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q4_0.log
396
+
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q4_1.log
397
+
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q5_0.log
398
+
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q5_1.log
399
+
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q2_k.log
400
+
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q3_k.log
401
+
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q4_k.log
402
+
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q5_k.log
403
+
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q6_k.log
404
+
405
+
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-imatrix.log
406
+
407
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa off ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
408
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa on ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
409
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa off ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
410
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa on ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
405
411
406
412
functioncheck_ppl {
407
413
qnt="$1"
@@ -416,7 +422,10 @@ function gg_run_qwen3_0_6b {
416
422
return 0
417
423
}
418
424
419
-
check_ppl "bf16""$(cat $OUT/${ci}-tg-bf16.log | grep "^\[1\]")"| tee -a $OUT/${ci}-ppl.log
425
+
check_ppl "f16""$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")"| tee -a $OUT/${ci}-ppl.log
426
+
if [ -z${GG_BUILD_NO_BF16} ];then
427
+
check_ppl "bf16""$(cat $OUT/${ci}-tg-bf16.log | grep "^\[1\]")"| tee -a $OUT/${ci}-ppl.log
428
+
fi
420
429
check_ppl "q8_0""$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")"| tee -a $OUT/${ci}-ppl.log
421
430
check_ppl "q4_0""$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")"| tee -a $OUT/${ci}-ppl.log
422
431
check_ppl "q4_1""$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")"| tee -a $OUT/${ci}-ppl.log
0 commit comments