11#! /bin/bash
22set -e
33
4- MODELS_REPO=https://huggingface.co/ltoniazzi/reduce-llms-for-testing
4+ MODELS_REPO=reduce-llms-for-testing
5+ MODELS_REPO_URL=https://huggingface.co/ltoniazzi/$MODELS_REPO
56
67# Clone the Hugging Face repository if the directory does not exist
7- if [ ! -d " reduce-llms-for-testing " ]; then
8+ if [ ! -d " $MODELS_REPO " ]; then
89 echo " Cloning the Hugging Face repository..."
9- git clone $MODELS_REPO
10+ git clone $MODELS_REPO_URL
1011else
1112 echo " Repository already exists. Skipping clone."
1213fi
@@ -17,54 +18,55 @@ results=()
1718run_conversion_and_inference_lora () {
1819 local model_name=$1
1920 local size_matrix=$2
20- local model_size_mb =$3
21+ local bos_token =$3
2122
2223 # Convert safetensors to gguf
2324 echo " Running convert_hf_to_gguf.py for $model_name with size $size_matrix ..."
24- python convert_hf_to_gguf.py reduce-llms-for-testing/$model_name /size=$size_matrix /base \
25+ python convert_hf_to_gguf.py $MODELS_REPO /$model_name /size=$size_matrix /base \
26+ --outfile $MODELS_REPO /$model_name /size=$size_matrix /base/Base-F32.gguf \
2527 --outtype f32
2628
2729 echo " Running convert_lora_to_gguf.py for $model_name with size $size_matrix ..."
28- python3 convert_lora_to_gguf.py reduce-llms-for-testing /$model_name /size=$size_matrix /lora \
29- --base reduce-llms-for-testing /$model_name /size=$size_matrix /base \
30+ python3 convert_lora_to_gguf.py $MODELS_REPO /$model_name /size=$size_matrix /lora \
31+ --base $MODELS_REPO /$model_name /size=$size_matrix /base \
3032 --outtype f32
3133
32- echo " Running llama-export-lora with lora for $model_name with size $size_matrix and model size $model_size_mb ..."
34+ echo " Running llama-export-lora with lora for $model_name with size $size_matrix ..."
3335 llama-export-lora \
34- -m reduce-llms-for-testing /$model_name /size=$size_matrix /base/Base- $model_size_mb -F32.gguf \
35- -o reduce-llms-for-testing /$model_name /size=$size_matrix /base/Base- $model_size_mb -F32-lora-merged.gguf \
36- --lora reduce-llms-for-testing /$model_name /size=$size_matrix /lora/Lora-F32-LoRA.gguf
36+ -m $MODELS_REPO /$model_name /size=$size_matrix /base/Base-F32.gguf \
37+ -o $MODELS_REPO /$model_name /size=$size_matrix /base/Base-F32-lora-merged.gguf \
38+ --lora $MODELS_REPO /$model_name /size=$size_matrix /lora/Lora-F32-LoRA.gguf
3739
3840 # Run inference
39- echo " Running llama-cli without lora for $model_name with size $size_matrix and model size $model_size_mb ..."
40- OUTPUT_BASE=$( llama-cli -m reduce-llms-for-testing /$model_name /size=$size_matrix /base/Base- $model_size_mb -F32.gguf \
41- -p " <bos> When forty winters shall besiege" -n 50 --seed 42)
41+ echo " Running llama-cli without lora for $model_name with size $size_matrix ..."
42+ OUTPUT_BASE=$( llama-cli -m $MODELS_REPO /$model_name /size=$size_matrix /base/Base-F32.gguf \
43+ -p " When forty winters shall besiege" -n 50 --seed 42)
4244
43- echo " Running llama-cli with lora for $model_name with size $size_matrix and model size $model_size_mb ..."
44- OUTPUT_LORA_HOT=$( llama-cli -m reduce-llms-for-testing /$model_name /size=$size_matrix /base/Base- $model_size_mb -F32.gguf \
45- --lora reduce-llms-for-testing /$model_name /size=$size_matrix /lora/Lora-F32-LoRA.gguf \
46- -p " <bos> I see a little silhouetto" -n 50 --seed 42)
45+ echo " Running llama-cli with lora for $model_name with size $size_matrix ..."
46+ OUTPUT_LORA_HOT=$( llama-cli -m $MODELS_REPO /$model_name /size=$size_matrix /base/Base-F32.gguf \
47+ --lora $MODELS_REPO /$model_name /size=$size_matrix /lora/Lora-F32-LoRA.gguf \
48+ -p " I see a little silhouetto" -n 50 --seed 42)
4749
48- echo " Running llama-cli with exported lora for $model_name with size $size_matrix and model size $model_size_mb ..."
49- OUTPUT_LORA_MERGED=$( llama-cli -m reduce-llms-for-testing /$model_name /size=$size_matrix /base/Base- $model_size_mb -F32-lora-merged.gguf \
50- -p " <bos> I see a little silhouetto" -n 50 --seed 42)
50+ echo " Running llama-cli with exported lora for $model_name with size $size_matrix ..."
51+ OUTPUT_LORA_MERGED=$( llama-cli -m $MODELS_REPO /$model_name /size=$size_matrix /base/Base-F32-lora-merged.gguf \
52+ -p " I see a little silhouetto" -n 50 --seed 42)
5153
5254 # Store the results in the regular array
5355 results+=("
54- \n\n\ 033[1mResults for $model_name with size $size_matrix and model size $model_size_mb :\033[0m
56+ \n\033[1mResults for $model_name with size $size_matrix :\033[0m
5557 \n • \033[32mBase:\n$OUTPUT_BASE
5658 \n • \033[34mLora hot:\n$OUTPUT_LORA_HOT
5759 \n • \033[36mLora merged:\n$OUTPUT_LORA_MERGED
58- \n\n \033[0m
60+ \n \033[0m
5961 " )
6062
61- echo " All steps completed for $model_name with size $size_matrix and model size $model_size_mb !"
63+ echo " All steps completed for $model_name with size $size_matrix !"
6264}
6365
6466# Array of parameters to iterate over
6567declare -a params=(
66- " Gemma2ForCausalLM 64 19M "
67- # "AnotherModel 128 25M "
68+ " Gemma2ForCausalLM 64 <bos> "
69+ " LlamaForCausalLM 64 <|begin_of_text|> "
6870)
6971
7072# Loop through each set of parameters
0 commit comments