@@ -19,6 +19,7 @@ run_conversion_and_inference_lora() {
1919 local size_matrix=$2
2020 local model_size_mb=$3
2121
22+ # Convert safetensors to gguf
2223 echo " Running convert_hf_to_gguf.py for $model_name with size $size_matrix ..."
2324 python convert_hf_to_gguf.py reduce-llms-for-testing/$model_name /size=$size_matrix /base \
2425 --outtype f32
@@ -28,6 +29,13 @@ run_conversion_and_inference_lora() {
2829 --base reduce-llms-for-testing/$model_name /size=$size_matrix /base \
2930 --outtype f32
3031
32+ echo " Running llama-export-lora with lora for $model_name with size $size_matrix and model size $model_size_mb ..."
33+ llama-export-lora \
34+ -m reduce-llms-for-testing/$model_name /size=$size_matrix /base/Base-$model_size_mb -F32.gguf \
35+ -o reduce-llms-for-testing/$model_name /size=$size_matrix /base/Base-$model_size_mb -F32-lora-merged.gguf \
36+ --lora reduce-llms-for-testing/$model_name /size=$size_matrix /lora/Lora-F32-LoRA.gguf
37+
38+ # Run inference
3139 echo " Running llama-cli without lora for $model_name with size $size_matrix and model size $model_size_mb ..."
3240 OUTPUT_BASE=$( llama-cli -m reduce-llms-for-testing/$model_name /size=$size_matrix /base/Base-$model_size_mb -F32.gguf \
3341 -p " <bos>When forty winters shall besiege" -n 50 --seed 42)
@@ -37,24 +45,17 @@ run_conversion_and_inference_lora() {
3745 --lora reduce-llms-for-testing/$model_name /size=$size_matrix /lora/Lora-F32-LoRA.gguf \
3846 -p " <bos>I see a little silhouetto" -n 50 --seed 42)
3947
40- # TODO add merge lora with lora-export and check
41- echo " Running llama-export-lora with lora for $model_name with size $size_matrix and model size $model_size_mb ..."
42- llama-export-lora \
43- -m reduce-llms-for-testing/$model_name /size=$size_matrix /base/Base-$model_size_mb -F32.gguf \
44- -o reduce-llms-for-testing/$model_name /size=$size_matrix /base/Base-$model_size_mb -F32-lora-merged.gguf \
45- --lora reduce-llms-for-testing/$model_name /size=$size_matrix /lora/Lora-F32-LoRA.gguf \
46-
4748 echo " Running llama-cli with exported lora for $model_name with size $size_matrix and model size $model_size_mb ..."
4849 OUTPUT_LORA_MERGED=$( llama-cli -m reduce-llms-for-testing/$model_name /size=$size_matrix /base/Base-$model_size_mb -F32-lora-merged.gguf \
4950 -p " <bos>I see a little silhouetto" -n 50 --seed 42)
5051
5152 # Store the results in the regular array
5253 results+=("
53- \n\n\n\ 033[1mResults for $model_name with size $size_matrix and model size $model_size_mb :\033[0m
54- \n • \033[32mBase:\n $OUTPUT_BASE
55- \n • \033[34mLora hot:\n $OUTPUT_LORA_HOT
56- \n • \033[36mLora merged:\n $OUTPUT_LORA_MERGED
57- \n\n\n \033[0m
54+ \n\n\033[1mResults for $model_name with size $size_matrix and model size $model_size_mb :\033[0m
55+ \n • \033[32mBase:\n$OUTPUT_BASE
56+ \n • \033[34mLora hot:\n$OUTPUT_LORA_HOT
57+ \n • \033[36mLora merged:\n$OUTPUT_LORA_MERGED
58+ \n\n \033[0m
5859 " )
5960
6061 echo " All steps completed for $model_name with size $size_matrix and model size $model_size_mb !"
0 commit comments