Skip to content

Commit 72fcff3

Browse files
authored
Merge branch 'main' into add-dim-order-clone-kernel
2 parents e1865bf + 72ef7b1 commit 72fcff3

File tree

17 files changed

+357
-269
lines changed

17 files changed

+357
-269
lines changed

.ci/scripts/test_huggingface_optimum_model.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,14 +262,20 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
262262

263263
assert torch.allclose(
264264
eager_output.logits, et_output, atol=1e-02, rtol=1e-02
265-
), "CoreML output does not match eager"
265+
), "Model output does not match eager"
266266

267267

268268
if __name__ == "__main__":
269269
parser = argparse.ArgumentParser()
270270
parser.add_argument("--model", type=str, required=True)
271271
parser.add_argument("--recipe", type=str, required=True)
272272
parser.add_argument("--quantize", action="store_true", help="Enable quantization")
273+
parser.add_argument(
274+
"--model_dir",
275+
type=str,
276+
required=False,
277+
help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
278+
)
273279
args = parser.parse_args()
274280

275281
model_to_model_id_and_test_function = {
@@ -294,11 +300,11 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
294300
f"Unknown model name: {args.model}. Available models: {model_to_model_id_and_test_function.keys()}"
295301
)
296302

303+
model_id, test_fn = model_to_model_id_and_test_function[args.model]
297304
with tempfile.TemporaryDirectory() as tmp_dir:
298-
model_id, test_fn = model_to_model_id_and_test_function[args.model]
299305
test_fn(
300306
model_id=model_id,
301-
model_dir=tmp_dir,
307+
model_dir=tmp_dir if args.model_dir is None else args.model_dir,
302308
recipe=args.recipe,
303309
quantize=args.quantize,
304310
)

.ci/scripts/test_model.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ test_model_with_qnn() {
199199
EXPORT_SCRIPT=albert
200200
elif [[ "${MODEL_NAME}" == "bert" ]]; then
201201
EXPORT_SCRIPT=bert
202+
elif [[ "${MODEL_NAME}" == "conv_former" ]]; then
203+
EXPORT_SCRIPT=conv_former
204+
EXTRA_FLAGS="--dataset imagenet-mini/val"
202205
elif [[ "${MODEL_NAME}" == "cvt" ]]; then
203206
EXPORT_SCRIPT=cvt
204207
elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
@@ -238,7 +241,7 @@ test_model_with_qnn() {
238241
"cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin")
239242
SCRIPT_FOLDER=oss_scripts
240243
;;
241-
"albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
244+
"albert"|"bert"|"conv_former"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
242245
pip install evaluate
243246
SCRIPT_FOLDER=oss_scripts
244247
# 16bit models will encounter op validation fail on some operations,

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ jobs:
568568
strategy:
569569
matrix:
570570
dtype: [fp32]
571-
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
571+
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, conv_former]
572572
fail-fast: false
573573
with:
574574
runner: linux.2xlarge

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4588,6 +4588,65 @@ def test_static_qwen2_5(self):
45884588
msg["inference_speed"], inference_speed_ref[self.model]
45894589
)
45904590

4591+
def test_qwen3(self):
4592+
if not self.required_envs():
4593+
self.skipTest("missing required envs")
4594+
4595+
prompt = "My favourite condiment is "
4596+
cmds = [
4597+
"python",
4598+
f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama/llama.py",
4599+
"--artifact",
4600+
self.artifact_dir,
4601+
"--build_folder",
4602+
self.build_folder,
4603+
"--model",
4604+
self.model,
4605+
"--ip",
4606+
self.ip,
4607+
"--port",
4608+
str(self.port),
4609+
"--prompt",
4610+
f"{prompt}",
4611+
"--ptq",
4612+
"16a8w",
4613+
"--decoder_model",
4614+
"qwen3_0.6b",
4615+
"--model_mode",
4616+
"hybrid",
4617+
"--prefill_ar_len",
4618+
"32",
4619+
"--max_seq_len",
4620+
"128",
4621+
]
4622+
if self.compile_only:
4623+
cmds.extend(["--compile_only"])
4624+
elif self.device:
4625+
cmds.extend(["--device", self.device])
4626+
if self.host:
4627+
cmds.extend(["--host", self.host])
4628+
elif self.enable_x86_64:
4629+
cmds.extend(["--enable_x86_64"])
4630+
if self.pre_gen_pte:
4631+
cmds.extend(["--pre_gen_pte", self.pre_gen_pte])
4632+
4633+
# Accuracy is bad for now. Just check user's prompt is returned.
4634+
golden_start_with = "My favourite condiment is "
4635+
p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
4636+
with Listener((self.ip, self.port)) as listener:
4637+
conn = listener.accept()
4638+
p.communicate()
4639+
msg = json.loads(conn.recv())
4640+
if "Error" in msg:
4641+
self.fail(msg["Error"])
4642+
else:
4643+
model_out = msg["result"][0]
4644+
self.assertTrue(
4645+
model_out.startswith(golden_start_with),
4646+
f"Expected Output: {golden_start_with}. Actual Output: {model_out}",
4647+
)
4648+
self.assertGreaterEqual(msg["inference_speed"], 70) # Lanai
4649+
45914650

45924651
class TestExampleOssScript(TestQNN):
45934652
def test_albert(self):

backends/qualcomm/tests/utils.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66
import collections
7-
import copy
87
import os
98
import subprocess
109
import tempfile
@@ -30,7 +29,7 @@
3029
get_soc_to_chipset_map,
3130
to_edge_transform_and_lower_to_qnn,
3231
)
33-
from executorch.devtools import generate_etrecord, Inspector
32+
from executorch.devtools import Inspector
3433
from executorch.devtools.inspector._inspector_utils import TimeScale
3534
from executorch.examples.qualcomm.utils import (
3635
generate_inputs,
@@ -512,11 +511,9 @@ def lower_module_and_test_output(
512511
skip_node_id_set=skip_node_id_set,
513512
skip_node_op_set=skip_node_op_set,
514513
skip_mutable_buffer=skip_mutable_buffer,
514+
generate_etrecord=self.enable_profile,
515515
)
516516

517-
# this is needed for the ETRecord as lowering modifies the graph in-place
518-
edge_copy = copy.deepcopy(delegated_program)
519-
520517
exec_prog = delegated_program.to_executorch(
521518
exir.ExecutorchBackendConfig(
522519
# For shared buffer, user must pass the memory address
@@ -543,7 +540,7 @@ def lower_module_and_test_output(
543540

544541
etrecord_path = "etrecord.bin"
545542
if self.enable_profile:
546-
generate_etrecord(etrecord_path, edge_copy, exec_prog)
543+
exec_prog.get_etrecord().save(etrecord_path)
547544
# Check numerics
548545
if (
549546
assert_output_equal

backends/qualcomm/utils/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ def to_edge_transform_and_lower_to_qnn(
334334
skip_node_id_set: Optional[set] = None,
335335
skip_node_op_set: Optional[set] = None,
336336
skip_mutable_buffer: bool = False,
337+
generate_etrecord: bool = False,
337338
) -> EdgeProgramManager:
338339
"""
339340
Transforms and lowers a given PyTorch module to the QNN backend.
@@ -442,6 +443,7 @@ def ensure_graph_specific_dict(value, graph_names):
442443
partitioner=qnn_partitioners,
443444
constant_methods=constant_methods,
444445
compile_config=qnn_edge_config(),
446+
generate_etrecord=generate_etrecord,
445447
)
446448

447449

0 commit comments

Comments
 (0)