Skip to content

Commit 0fc5801

Browse files
author
Guang Yang
committed
Benchmark Gemma-3
1 parent 9c9f665 commit 0fc5801

File tree

5 files changed

+10
-37
lines changed

5 files changed

+10
-37
lines changed

.github/workflows/android-perf-private-device-experiment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@ jobs:
5757
id-token: write
5858
contents: read
5959
with:
60-
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
60+
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }}
6161
devices: samsung_galaxy_s22_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/android-perf.yml

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
# Separate default values from the workflow dispatch. To ensure defaults are accessible
7373
# during scheduled runs and to provide flexibility for different defaults between
7474
# on-demand and periodic benchmarking.
75-
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
75+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
7676
CRON_DEFAULT_DEVICES: samsung_galaxy_s22
7777
run: |
7878
set -eux
@@ -344,7 +344,7 @@ jobs:
344344
git clone https://github.com/huggingface/optimum-executorch
345345
pushd optimum-executorch
346346
# There is no release yet, for CI stability, always test from the same commit on main
347-
git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
347+
git checkout c2b20c9cec66655ce42a75636002b4176aa9644a
348348
python install_dev.py --skip_override_torch
349349
pip list
350350
@@ -353,21 +353,12 @@ jobs:
353353
"--task" "text-generation"
354354
"--recipe" "xnnpack"
355355
"--use_custom_sdpa"
356+
"--use_custom_kv_cache"
356357
"--qlinear"
357358
"--qembedding"
358359
"--output_dir" ".."
359360
)
360361
361-
# Add conditional arguments based on model
362-
case "${HF_MODEL_REPO}" in
363-
*"google/gemma-3-1b-it"*)
364-
echo "--use_custom_kv_cache can not be used for HybridCache"
365-
;;
366-
*)
367-
ARGS+=("--use_custom_kv_cache")
368-
;;
369-
esac
370-
371362
optimum-cli export executorch "${ARGS[@]}"
372363
popd
373364

.github/workflows/apple-perf-private-device-experiment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@ jobs:
5757
id-token: write
5858
contents: read
5959
with:
60-
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
60+
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }}
6161
devices: apple_iphone_15_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/apple-perf.yml

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
# Separate default values from the workflow dispatch. To ensure defaults are accessible
7373
# during scheduled runs and to provide flexibility for different defaults between
7474
# on-demand and periodic benchmarking.
75-
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
75+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
7676
CRON_DEFAULT_DEVICES: apple_iphone_15
7777
run: |
7878
set -eux
@@ -349,7 +349,7 @@ jobs:
349349
git clone https://github.com/huggingface/optimum-executorch
350350
pushd optimum-executorch
351351
# There is no release yet, for CI stability, always test from the same commit on main
352-
git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
352+
git checkout c2b20c9cec66655ce42a75636002b4176aa9644a
353353
${CONDA_RUN} python install_dev.py --skip_override_torch
354354
pip list
355355
@@ -358,21 +358,12 @@ jobs:
358358
"--task" "text-generation"
359359
"--recipe" "xnnpack"
360360
"--use_custom_sdpa"
361+
"--use_custom_kv_cache"
361362
"--qlinear"
362363
"--qembedding"
363364
"--output_dir" ".."
364365
)
365366
366-
# Add conditional arguments based on model
367-
case "${HF_MODEL_REPO}" in
368-
*"google/gemma-3-1b-it"*)
369-
echo "--use_custom_kv_cache can not be used for HybridCache"
370-
;;
371-
*)
372-
ARGS+=("--use_custom_kv_cache")
373-
;;
374-
esac
375-
376367
${CONDA_RUN} optimum-cli export executorch "${ARGS[@]}"
377368
popd
378369

.github/workflows/trunk.yml

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,7 @@ jobs:
597597
git clone https://github.com/huggingface/optimum-executorch
598598
pushd optimum-executorch
599599
# There is no release yet, for CI stability, always test from the same commit on main
600-
git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
600+
git checkout c2b20c9cec66655ce42a75636002b4176aa9644a
601601
python install_dev.py --skip_override_torch
602602
popd
603603
pip list
@@ -614,21 +614,12 @@ jobs:
614614
"--task" "text-generation"
615615
"--recipe" "xnnpack"
616616
"--use_custom_sdpa"
617+
"--use_custom_kv_cache"
617618
"--qlinear"
618619
"--qembedding"
619620
"--output_dir" "${OUTPUT_DIR}"
620621
)
621622
622-
# Add conditional arguments based on model
623-
case "${MODEL_ID}" in
624-
*"google/gemma-3-1b-it"*)
625-
echo "--use_custom_kv_cache can not be used for HybridCache"
626-
;;
627-
*)
628-
ARGS+=("--use_custom_kv_cache")
629-
;;
630-
esac
631-
632623
optimum-cli export executorch "${ARGS[@]}"
633624
634625
ls -FlAGhp ${OUTPUT_DIR}

0 commit comments

Comments
 (0)