Skip to content

Commit c0cccb6

Browse files
committed
Merge remote-tracking branch 'origin/main' into audio-input-test
2 parents 89fdd72 + 73b3303 commit c0cccb6

File tree

363 files changed

+17432
-2526
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

363 files changed

+17432
-2526
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
828ae02053a6e0e20a2dfd6e737ba10c6f4dee6b
1+
bd06b54e627fbfd354a2cffa4c80fb21883209a9

.ci/scripts/test_backend_linux.sh renamed to .ci/scripts/test_backend.sh

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,26 @@ SUITE=$1
1010
FLOW=$2
1111
ARTIFACT_DIR=$3
1212

13-
REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv"
13+
REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.json"
1414

1515
echo "Running backend test job for suite $SUITE, flow $FLOW."
1616
echo "Saving job artifacts to $ARTIFACT_DIR."
1717

18-
# The generic Linux job chooses to use base env, not the one setup by the image
1918
eval "$(conda shell.bash hook)"
2019
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
2120
conda activate "${CONDA_ENV}"
2221

22+
if [[ "$(uname)" == "Darwin" ]]; then
23+
bash .ci/scripts/setup-conda.sh
24+
eval "$(conda shell.bash hook)"
25+
CONDA_RUN_CMD="${CONDA_RUN} --no-capture-output"
26+
${CONDA_RUN_CMD} pip install awscli==1.37.21
27+
IS_MACOS=1
28+
else
29+
CONDA_RUN_CMD=""
30+
IS_MACOS=0
31+
fi
32+
2333
export PYTHON_EXECUTABLE=python
2434

2535
# CMake options to use, in addition to the defaults.
@@ -50,11 +60,14 @@ if [[ "$FLOW" == *arm* ]]; then
5060
.ci/scripts/setup-arm-baremetal-tools.sh
5161
fi
5262

53-
# We need the runner to test the built library.
54-
PYTHON_EXECUTABLE=python CMAKE_ARGS="$EXTRA_BUILD_ARGS" .ci/scripts/setup-linux.sh --build-tool cmake --build-mode Release --editable true
63+
if [[ $IS_MACOS -eq 1 ]]; then
64+
SETUP_SCRIPT=.ci/scripts/setup-macos.sh
65+
else
66+
SETUP_SCRIPT=.ci/scripts/setup-linux.sh
67+
fi
68+
CMAKE_ARGS="$EXTRA_BUILD_ARGS" ${CONDA_RUN_CMD} $SETUP_SCRIPT --build-tool cmake --build-mode Release --editable true
5569

5670
EXIT_CODE=0
57-
python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report "$REPORT_FILE" || EXIT_CODE=$?
58-
71+
${CONDA_RUN_CMD} pytest -c /dev/nul -n auto backends/test/suite/$SUITE/ -m flow_$FLOW --json-report --json-report-file="$REPORT_FILE" || EXIT_CODE=$?
5972
# Generate markdown summary.
60-
python -m executorch.backends.test.suite.generate_markdown_summary "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE
73+
${CONDA_RUN_CMD} python -m executorch.backends.test.suite.generate_markdown_summary_json "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE

.ci/scripts/test_backend_macos.sh

Lines changed: 0 additions & 30 deletions
This file was deleted.

.ci/scripts/test_huggingface_optimum_model.py

Lines changed: 114 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ def cli_export(command, model_dir):
4343

4444

4545
def check_causal_lm_output_quality(
46-
model_id: str, generated_tokens: List[int], max_perplexity_threshold: float = 100.0
46+
model_id: str,
47+
generated_tokens: List[int],
48+
max_perplexity_threshold: float = 100.0,
4749
):
4850
"""
4951
Evaluates the quality of text generated by a causal language model by calculating its perplexity.
@@ -58,12 +60,24 @@ def check_causal_lm_output_quality(
5860
"""
5961
logging.info(f"Starting perplexity check with model '{model_id}' ...")
6062
# Load model
61-
model = AutoModelForCausalLM.from_pretrained(
62-
model_id,
63-
low_cpu_mem_usage=True,
64-
use_cache=False,
65-
torch_dtype=torch.bfloat16,
66-
)
63+
cls_name = AutoModelForCausalLM
64+
if "llava" in model_id:
65+
from transformers import LlavaForConditionalGeneration
66+
67+
cls_name = LlavaForConditionalGeneration
68+
try:
69+
model = cls_name.from_pretrained(
70+
model_id,
71+
low_cpu_mem_usage=True,
72+
use_cache=False,
73+
torch_dtype=torch.bfloat16,
74+
)
75+
except TypeError:
76+
model = cls_name.from_pretrained(
77+
model_id,
78+
low_cpu_mem_usage=True,
79+
torch_dtype=torch.bfloat16,
80+
)
6781

6882
with torch.no_grad():
6983
outputs = model(input_ids=generated_tokens, labels=generated_tokens)
@@ -156,6 +170,86 @@ def test_text_generation(model_id, model_dir, recipe, *, quantize=True, run_only
156170
assert check_causal_lm_output_quality(model_id, generated_tokens) is True
157171

158172

173+
def test_llm_with_image_modality(
174+
model_id, model_dir, recipe, *, quantize=True, run_only=False
175+
):
176+
command = [
177+
"optimum-cli",
178+
"export",
179+
"executorch",
180+
"--model",
181+
model_id,
182+
"--task",
183+
"multimodal-text-to-text",
184+
"--recipe",
185+
recipe,
186+
"--output_dir",
187+
model_dir,
188+
"--use_custom_sdpa",
189+
"--use_custom_kv_cache",
190+
"--qlinear",
191+
"8da4w",
192+
"--qembedding",
193+
"8w",
194+
]
195+
if not run_only:
196+
cli_export(command, model_dir)
197+
198+
tokenizer = AutoTokenizer.from_pretrained(model_id)
199+
tokenizer.save_pretrained(model_dir)
200+
201+
# input
202+
processor = AutoProcessor.from_pretrained(model_id)
203+
image_url = "https://llava-vl.github.io/static/images/view.jpg"
204+
conversation = [
205+
{
206+
"role": "system",
207+
"content": [
208+
{
209+
"type": "text",
210+
"text": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
211+
}
212+
],
213+
},
214+
{
215+
"role": "user",
216+
"content": [
217+
{"type": "image", "url": image_url},
218+
{
219+
"type": "text",
220+
"text": "What are the things I should be cautious about when I visit here?",
221+
},
222+
],
223+
},
224+
]
225+
inputs = processor.apply_chat_template(
226+
conversation,
227+
add_generation_prompt=True,
228+
tokenize=True,
229+
return_dict=True,
230+
return_tensors="pt",
231+
)
232+
233+
from executorch.extension.llm.runner import GenerationConfig, MultimodalRunner
234+
235+
runner = MultimodalRunner(f"{model_dir}/model.pte", f"{model_dir}/tokenizer.model")
236+
generated_text = runner.generate_text_hf(
237+
inputs,
238+
GenerationConfig(max_new_tokens=128, temperature=0, echo=False),
239+
processor.image_token_id,
240+
)
241+
print(f"\nGenerated text:\n\t{generated_text}")
242+
# Free memory before loading eager for quality check
243+
del runner
244+
gc.collect()
245+
assert (
246+
check_causal_lm_output_quality(
247+
model_id, tokenizer.encode(generated_text, return_tensors="pt")
248+
)
249+
is True
250+
)
251+
252+
159253
def test_fill_mask(model_id, model_dir, recipe, *, quantize=True, run_only=False):
160254
command = [
161255
"optimum-cli",
@@ -353,6 +447,9 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
353447
required=False,
354448
help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
355449
)
450+
parser.add_argument(
451+
"--run_only", action="store_true", help="Skip export and only run the test"
452+
)
356453
args = parser.parse_args()
357454

358455
_text_generation_mapping = {
@@ -384,8 +481,16 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
384481
"vit": ("google/vit-base-patch16-224", test_vit),
385482
}
386483

484+
_multimodal_model_mapping = {
485+
"gemma3-4b": ("google/gemma-3-4b-it", test_llm_with_image_modality),
486+
"llava": ("llava-hf/llava-1.5-7b-hf", test_llm_with_image_modality),
487+
}
488+
387489
model_to_model_id_and_test_function = (
388-
_text_generation_mapping | _mask_fill_mapping | _misc_model_mapping
490+
_text_generation_mapping
491+
| _mask_fill_mapping
492+
| _misc_model_mapping
493+
| _multimodal_model_mapping
389494
)
390495

391496
if args.model not in model_to_model_id_and_test_function:
@@ -400,4 +505,5 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
400505
model_dir=tmp_dir if args.model_dir is None else args.model_dir,
401506
recipe=args.recipe,
402507
quantize=args.quantize,
508+
run_only=args.run_only,
403509
)

.ci/scripts/test_llama.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ cmake_install_executorch_libraries() {
159159
-DCMAKE_INSTALL_PREFIX=cmake-out \
160160
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
161161
-DEXECUTORCH_BUILD_QNN="$QNN" \
162+
-DEXECUTORCH_ENABLE_LOGGING=ON \
162163
-DQNN_SDK_ROOT="$QNN_SDK_ROOT"
163164
cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
164165
}
@@ -236,7 +237,7 @@ if [[ "${CUSTOM}" == "ON" ]]; then
236237
EXPORT_ARGS="${EXPORT_ARGS} model.use_sdpa_with_kv_cache=true"
237238
fi
238239
if [[ "${QE}" == "ON" ]]; then
239-
EXPORT_ARGS="${EXPORT_ARGS} quantization.embedding_quantize=\"8,1024\""
240+
EXPORT_ARGS="${EXPORT_ARGS} quantization.embedding_quantize=\"8,768\""
240241
fi
241242
if [[ "${MPS}" == "ON" ]]; then
242243
EXPORT_ARGS="${EXPORT_ARGS} backend.mps.enabled=true model.enable_dynamic_shape=false debug.verbose=true"

.ci/scripts/test_llava.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ cmake_build_llava_runner_for_android() {
107107
# only export the one without custom op for now since it's
108108
export_llava() {
109109
echo "Starting to export Llava. This will take about 6 mins"
110-
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts
110+
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts --max-context-len 768
111111
}
112112

113113
# Download a new image
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -euo pipefail
9+
10+
echo ">>> Script invoked with arguments: $@"
11+
12+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
13+
14+
# Download QNN_SDK. If already downloaded, export environment path
15+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
16+
install_qnn
17+
18+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
19+
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
20+
export PYTHONPATH=".."
21+
cp schema/program.fbs exir/_serialize/program.fbs
22+
cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
23+
cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
24+
cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
25+
26+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
27+
PYTHON_EXECUTABLE=python3
28+
fi
29+
30+
which "${PYTHON_EXECUTABLE}"
31+
32+
# -------------------------------
33+
# Parse args
34+
# -------------------------------
35+
EXTRA_FLAGS=""
36+
THRESHOLD=62.0 # default fallback
37+
38+
while [[ $# -gt 0 ]]; do
39+
case "$1" in
40+
--flags)
41+
EXTRA_FLAGS="$2"
42+
shift 2
43+
;;
44+
--threshold)
45+
THRESHOLD="$2"
46+
shift 2
47+
;;
48+
*)
49+
echo "Unknown option: $1"
50+
exit 1
51+
;;
52+
esac
53+
done
54+
55+
# Config
56+
PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE:-python3}"
57+
MODEL="qwen2_5-0_5b"
58+
MAX_SEQ=1024
59+
PTQ="16a4w"
60+
61+
EXTRA_FLAGS="$@"
62+
63+
# Run command and capture *both stdout and stderr*
64+
LOG_FILE="eval_${MODEL}_$(date +%Y%m%d_%H%M%S).log"
65+
66+
echo ">>> Running evaluation with flags: $EXTRA_FLAGS | threshold: $THRESHOLD"
67+
$PYTHON_EXECUTABLE -m executorch.examples.qualcomm.oss_scripts.llama.eval_llama_qnn \
68+
--decoder_model "$MODEL" \
69+
--quant_linear_only \
70+
--max_seq_length "$MAX_SEQ" \
71+
--ptq "$PTQ" \
72+
$EXTRA_FLAGS 2>&1 | tee "$LOG_FILE"
73+
74+
# Extract last word_perplexity
75+
LAST_PERP=$(grep "INFO:root:wikitext:" "$LOG_FILE" | tail -n 1 | sed -E "s/.*'word_perplexity,none': ([0-9.]+).*/\1/")
76+
77+
if [[ -z "$LAST_PERP" ]]; then
78+
echo "❌ Could not find word_perplexity in logs!"
79+
exit 1
80+
fi
81+
82+
echo ">>> Last word_perplexity = $LAST_PERP"
83+
84+
# Compare against threshold
85+
awk -v val="$LAST_PERP" -v thr="$THRESHOLD" 'BEGIN {exit (val > thr)}'
86+
if [[ $? -ne 0 ]]; then
87+
echo "❌ Regression detected: word_perplexity ($LAST_PERP) > threshold ($THRESHOLD)"
88+
exit 1
89+
fi
90+
91+
echo "✅ Check passed: word_perplexity ($LAST_PERP) <= $THRESHOLD"

0 commit comments

Comments
 (0)