Skip to content

Commit b42ba36

Browse files
committed
Fix loading non-llama LLM via ios benchmark app
1 parent d12c6f6 commit b42ba36

File tree

3 files changed

+13
-5
lines changed

3 files changed

+13
-5
lines changed

.github/workflows/android-perf.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,9 @@ jobs:
319319
-d fp32 \
320320
-X \
321321
--xnnpack-extended-ops \
322-
-qmode 8da4w -G 32 -E 8,0 \
322+
-qmode 8da4w \
323+
-G 32 \
324+
-E 8,0 \
323325
--metadata '{"get_bos_id": 151644, "get_eos_ids":[151645]}' \
324326
--output_name="${OUT_ET_MODEL_NAME}.pte"
325327
ls -lh "${OUT_ET_MODEL_NAME}.pte"

.github/workflows/apple-perf.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,8 @@ jobs:
209209
210210
# Convert HF checkpoint to ET via etLLM path
211211
if [[ "$HF_MODEL_REPO" == meta-llama/* ]]; then
212+
# The benchmark app replies on the _llm suffix to determine whether the model is a LLM or not
213+
OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm
212214
# Llama models on Hugging Face
213215
if [[ ${{ matrix.config }} == "llama3_spinquant" ]]; then
214216
# SpinQuant
@@ -311,6 +313,7 @@ jobs:
311313
ls -lh "${OUT_ET_MODEL_NAME}.pte"
312314
fi
313315
elif [[ "$HF_MODEL_REPO" == "Qwen/Qwen3-0.6B" ]]; then
316+
OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm
314317
if [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then
315318
DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json")
316319
${CONDA_RUN} python -m examples.models.llama.export_llama \
@@ -321,7 +324,9 @@ jobs:
321324
-d fp32 \
322325
-X \
323326
--xnnpack-extended-ops \
324-
-qmode 8da4w -G 32 -E 8,0 \
327+
-qmode 8da4w \
328+
-G 32 \
329+
-E 8,0 \
325330
--metadata '{"get_bos_id": 151644, "get_eos_ids":[151645]}' \
326331
--output_name="${OUT_ET_MODEL_NAME}.pte"
327332
ls -lh "${OUT_ET_MODEL_NAME}.pte"
@@ -367,12 +372,13 @@ jobs:
367372
${CONDA_RUN} optimum-cli export executorch "${ARGS[@]}"
368373
popd
369374
375+
# The benchmark app replies on the _llm suffix to determine whether the model is a LLM or not
376+
OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm
370377
mv model.pte ${OUT_ET_MODEL_NAME}.pte
371378
ls -lh "${OUT_ET_MODEL_NAME}.pte"
372379
fi
373380
374-
# zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.*
375-
zip -j model.zip ${OUT_ET_MODEL_NAME}.pte
381+
zip -j model.zip ${OUT_ET_MODEL_NAME}.pte ${DOWNLOADED_PATH}/tokenizer.*
376382
ls -lh model.zip
377383
mkdir -p "${ARTIFACTS_DIR_NAME}"
378384
mv model.zip "${ARTIFACTS_DIR_NAME}"

extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ @implementation LLaMATests
6060
+ (NSDictionary<NSString *, BOOL (^)(NSString *)> *)predicates {
6161
return @{
6262
@"model" : ^BOOL(NSString *filename){
63-
return [filename hasSuffix:@".pte"] && [filename.lowercaseString containsString:@"llama"];
63+
return [filename hasSuffix:@".pte"] && [filename.lowercaseString containsString:@"llm"];
6464
},
6565
@"tokenizer" : ^BOOL(NSString *filename) {
6666
return [filename isEqual:@"tokenizer.bin"] || [filename isEqual:@"tokenizer.model"] || [filename isEqual:@"tokenizer.json"];

0 commit comments

Comments
 (0)