You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
(time ./bin/llama-cli -no-cnv --model ${model_bf16} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-bf16.log
376
-
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q8_0.log
377
-
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_0.log
378
-
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_1.log
379
-
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_0.log
380
-
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_1.log
381
-
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q2_k.log
382
-
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q3_k.log
383
-
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_k.log
384
-
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_k.log
385
-
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 2048 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q6_k.log
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 2048 -fa off ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
402
-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 2048 -fa on ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
403
-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 2048 -fa off ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
404
-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 2048 -fa on ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
377
+
(time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-f16.log
378
+
(time ./bin/llama-cli -no-cnv --model ${model_bf16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-bf16.log
379
+
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q8_0.log
380
+
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_0.log
381
+
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_1.log
382
+
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_0.log
383
+
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_1.log
384
+
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q2_k.log
385
+
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q3_k.log
386
+
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q4_k.log
387
+
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q5_k.log
388
+
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1| tee -a $OUT/${ci}-tg-q6_k.log
389
+
390
+
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-f16.log
391
+
(time ./bin/llama-perplexity --model ${model_bf16} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-bf16.log
392
+
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q8_0.log
393
+
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q4_0.log
394
+
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q4_1.log
395
+
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q5_0.log
396
+
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q5_1.log
397
+
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q2_k.log
398
+
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q3_k.log
399
+
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q4_k.log
400
+
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q5_k.log
401
+
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-tg-q6_k.log
402
+
403
+
(time ./bin/llama-imatrix --model ${model_bf16} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1| tee -a $OUT/${ci}-imatrix.log
404
+
405
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa off ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
406
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa on ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
407
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa off ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
408
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa on ) 2>&1| tee -a $OUT/${ci}-save-load-state.log
405
409
406
410
functioncheck_ppl {
407
411
qnt="$1"
@@ -416,6 +420,7 @@ function gg_run_qwen3_0_6b {
416
420
return 0
417
421
}
418
422
423
+
check_ppl "f16""$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")"| tee -a $OUT/${ci}-ppl.log
419
424
check_ppl "bf16""$(cat $OUT/${ci}-tg-bf16.log | grep "^\[1\]")"| tee -a $OUT/${ci}-ppl.log
420
425
check_ppl "q8_0""$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")"| tee -a $OUT/${ci}-ppl.log
421
426
check_ppl "q4_0""$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")"| tee -a $OUT/${ci}-ppl.log
0 commit comments