E2E with gemma-2 (still issues with lora)

Lorenzo Toniazzi · Lorenzo Toniazzi · commit f249ca42c5cc · 2024-08-09T20:30:56.000+01:00
diff --git a/tests/test_lora_conversion_and_inference.sh b/tests/test_lora_conversion_and_inference.sh
@@ -10,26 +10,28 @@ else
 fi
 
 
-run_llama_cli() {
+run_conversion_and_inference_lora() {
     local model_name=$1
-    local size=$2
-    local model_size=$3
+    local size_matrix=$2
+    local model_size_mb=$3
 
-    echo "Running convert_hf_to_gguf.py for $model_name with size $size..."
-    python convert_hf_to_gguf.py reduce-llms-for-testing/$model_name/size=$size/base --outtype f32
+    echo "Running convert_hf_to_gguf.py for $model_name with size $size_matrix..."
+    python convert_hf_to_gguf.py reduce-llms-for-testing/$model_name/size=$size_matrix/base --outtype f32
 
-    echo "Running convert_lora_to_gguf.py for $model_name with size $size..."
-    python3 convert_lora_to_gguf.py reduce-llms-for-testing/$model_name/size=$size/lora --base reduce-llms-for-testing/$model_name/size=$size/base --outtype f32
+    echo "Running convert_lora_to_gguf.py for $model_name with size $size_matrix..."
+    python3 convert_lora_to_gguf.py reduce-llms-for-testing/$model_name/size=$size_matrix/lora --base reduce-llms-for-testing/$model_name/size=$size_matrix/base --outtype f32
 
-    echo "Running llama-cli without lora for $model_name with size $size and model size $model_size..."
-    llama-cli -m reduce-llms-for-testing/$model_name/size=$size/base/Base-$model_size-F32.gguf -p "<bos>When forty winters shall besiege" -n 50
+    echo "Running llama-cli without lora for $model_name with size $size_matrix and model size $model_size_mb..."
+    llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf -p "<bos>When forty winters shall besiege" -n 50
 
-    echo "Running llama-cli with lora for $model_name with size $size and model size $model_size..."
-    llama-cli -m reduce-llms-for-testing/$model_name/size=$size/base/Base-$model_size-F32.gguf --lora reduce-llms-for-testing/$model_name/size=$size/lora/Lora-F32-LoRA.gguf -p "<bos>I see a " -n 50
+    echo "Running llama-cli with lora for $model_name with size $size_matrix and model size $model_size_mb..."
+    llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf --lora reduce-llms-for-testing/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf -p "<bos>I see a " -n 50
 
-    echo "All steps completed for $model_name with size $size and model size $model_size!"
+    # TODO add merge lora with lora-export and check
+
+    echo "All steps completed for $model_name with size $size_matrix and model size $model_size_mb!"
 }
 
 # Example usage:
-run_llama_cli "Gemma2ForCausalLM" "64" "19M"
+run_conversion_and_inference_lora "Gemma2ForCausalLM" "64" "19M"