@@ -55,7 +55,7 @@ cmake_build_llama_runner
5555# Constants.
5656RUNTIME_ARGS=" --tokenizer_path=${DOWNLOADED_PATH} /tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
5757PROMPT=" What happens if you eat watermelon seeds?"
58- EXPECTED_PREFIX=" What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C, "
58+ EXPECTED_PREFIX=" What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C and "
5959
6060# Export LoRA PTE file.
6161MODEL_NAME=" llama_3_2_1B_lora"
9494 exit 1
9595fi
9696
97- # Export LoRA PTE, PTD file.
97+ # Export LoRA PTE, foundation PTD file.
9898MODEL_SEPARATE=" ${MODEL_NAME} _separate"
9999$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
100100 base.checkpoint=" ${DOWNLOADED_PATH} /consolidated.00.pth" \
@@ -114,20 +114,62 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
114114NOW=$( date +" %H:%M:%S" )
115115echo " Starting to run llama runner at ${NOW} "
116116# shellcheck source=/dev/null
117- cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE} .pte --data_path =${MODEL_SEPARATE} .ptd --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result2.txt
117+ cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE} .pte --data_paths =${MODEL_SEPARATE} .ptd --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result2.txt
118118NOW=$( date +" %H:%M:%S" )
119119echo " Finished at ${NOW} "
120120
121121RESULT2=$( cat result2.txt)
122122if [[ " ${RESULT2} " == " ${EXPECTED_PREFIX} " * ]]; then
123123 echo " Expected result prefix: ${EXPECTED_PREFIX} "
124124 echo " Actual result: ${RESULT2} "
125+ # Do not clean up files if test passes, as they're re-used in the next test.
125126 echo " Success"
126- cleanup_files
127127else
128128 echo " Expected result prefix: ${EXPECTED_PREFIX} "
129129 echo " Actual result: ${RESULT2} "
130130 echo " Failure; results not the same"
131131 cleanup_files
132132 exit 1
133133fi
134+
135+ # Export LoRA PTE, LoRA PTD, foundation PTD file.
136+ MODEL_PROGRAM_ONLY=" ${MODEL_NAME} _program"
137+ MODEL_LORA_WEIGHTS=" lora_weights"
138+ MODEL_FOUNDATION_WEIGHTS=" foundation_weights"
139+ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
140+ base.checkpoint=" ${DOWNLOADED_PATH} /consolidated.00.pth" \
141+ base.params=" ${DOWNLOADED_PATH} /params.json" \
142+ base.adapter_checkpoint=" ${DOWNLOADED_PATH} /adapter_model.pt" \
143+ base.adapter_config=" ${DOWNLOADED_PATH} /adapter_config.json" \
144+ base.tokenizer_path=" ${DOWNLOADED_PATH} /tokenizer.model" \
145+ model.use_kv_cache=true \
146+ model.use_sdpa_with_kv_cache=true \
147+ model.dtype_override=" fp32" \
148+ backend.xnnpack.enabled=true \
149+ backend.xnnpack.extended_ops=true \
150+ export.output_name=" ${MODEL_PROGRAM_ONLY} .pte" \
151+ export.foundation_weights_file=" ${MODEL_FOUNDATION_WEIGHTS} .ptd" \
152+ export.lora_weights_file=" ${MODEL_LORA_WEIGHTS} .ptd"
153+
154+ # Run llama runner.
155+ NOW=$( date +" %H:%M:%S" )
156+ echo " Starting to run llama runner at ${NOW} "
157+ # shellcheck source=/dev/null
158+ cmake-out/examples/models/llama/llama_main --model_path=${MODEL_PROGRAM_ONLY} .pte --data_paths=" ${MODEL_FOUNDATION_WEIGHTS} .ptd,${MODEL_LORA_WEIGHTS} .ptd" --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result3.txt
159+ NOW=$( date +" %H:%M:%S" )
160+ echo " Finished at ${NOW} "
161+
162+ RESULT3=$( cat result3.txt)
163+ if [[ " ${RESULT3} " == " ${EXPECTED_PREFIX} " * ]]; then
164+ echo " Expected result prefix: ${EXPECTED_PREFIX} "
165+ echo " Actual result: ${RESULT3} "
166+ echo " Success"
167+ else
168+ echo " Expected result prefix: ${EXPECTED_PREFIX} "
169+ echo " Actual result: ${RESULT3} "
170+ echo " Failure; results not the same"
171+ cleanup_files
172+ exit 1
173+ fi
174+
175+ cleanup_files
0 commit comments