pytorch
diff --git a/‎.ci/scripts/test_huggingface_optimum_model.py‎
Lines changed: 9 additions & 3 deletions b/‎.ci/scripts/test_huggingface_optimum_model.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 4 additions & 1 deletion b/‎.ci/scripts/test_model.sh‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/qualcomm/tests/test_qnn_delegate.py‎
Lines changed: 59 additions & 0 deletions b/‎backends/qualcomm/tests/test_qnn_delegate.py‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎backends/qualcomm/tests/utils.py‎
Lines changed: 3 additions & 6 deletions b/‎backends/qualcomm/tests/utils.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎backends/qualcomm/utils/utils.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/qualcomm/utils/utils.py‎
Lines changed: 2 additions & 0 deletions
@@ -262,14 +262,20 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
 
     assert torch.allclose(
         eager_output.logits, et_output, atol=1e-02, rtol=1e-02
-    ), "CoreML output does not match eager"
+    ), "Model output does not match eager"
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--model", type=str, required=True)
     parser.add_argument("--recipe", type=str, required=True)
     parser.add_argument("--quantize", action="store_true", help="Enable quantization")
+    parser.add_argument(
+        "--model_dir",
+        type=str,
+        required=False,
+        help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
+    )
     args = parser.parse_args()
 
     model_to_model_id_and_test_function = {
@@ -294,11 +300,11 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
             f"Unknown model name: {args.model}. Available models: {model_to_model_id_and_test_function.keys()}"
         )
 
+    model_id, test_fn = model_to_model_id_and_test_function[args.model]
     with tempfile.TemporaryDirectory() as tmp_dir:
-        model_id, test_fn = model_to_model_id_and_test_function[args.model]
         test_fn(
             model_id=model_id,
-            model_dir=tmp_dir,
+            model_dir=tmp_dir if args.model_dir is None else args.model_dir,
             recipe=args.recipe,
             quantize=args.quantize,
         )
@@ -199,6 +199,9 @@ test_model_with_qnn() {
     EXPORT_SCRIPT=albert
   elif [[ "${MODEL_NAME}" == "bert" ]]; then
     EXPORT_SCRIPT=bert
+  elif [[ "${MODEL_NAME}" == "conv_former" ]]; then
+    EXPORT_SCRIPT=conv_former
+    EXTRA_FLAGS="--dataset imagenet-mini/val"
   elif [[ "${MODEL_NAME}" == "cvt" ]]; then
     EXPORT_SCRIPT=cvt
   elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
@@ -238,7 +241,7 @@ test_model_with_qnn() {
     "cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin")
         SCRIPT_FOLDER=oss_scripts
         ;;
-    "albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
+    "albert"|"bert"|"conv_former"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
         pip install evaluate
         SCRIPT_FOLDER=oss_scripts
         # 16bit models will encounter op validation fail on some operations,
 
@@ -568,7 +568,7 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
+        model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, conv_former]
       fail-fast: false
     with:
       runner: linux.2xlarge
 
@@ -4588,6 +4588,65 @@ def test_static_qwen2_5(self):
                         msg["inference_speed"], inference_speed_ref[self.model]
                     )
 
+    def test_qwen3(self):
+        if not self.required_envs():
+            self.skipTest("missing required envs")
+
+        prompt = "My favourite condiment is "
+        cmds = [
+            "python",
+            f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama/llama.py",
+            "--artifact",
+            self.artifact_dir,
+            "--build_folder",
+            self.build_folder,
+            "--model",
+            self.model,
+            "--ip",
+            self.ip,
+            "--port",
+            str(self.port),
+            "--prompt",
+            f"{prompt}",
+            "--ptq",
+            "16a8w",
+            "--decoder_model",
+            "qwen3_0.6b",
+            "--model_mode",
+            "hybrid",
+            "--prefill_ar_len",
+            "32",
+            "--max_seq_len",
+            "128",
+        ]
+        if self.compile_only:
+            cmds.extend(["--compile_only"])
+        elif self.device:
+            cmds.extend(["--device", self.device])
+        if self.host:
+            cmds.extend(["--host", self.host])
+        elif self.enable_x86_64:
+            cmds.extend(["--enable_x86_64"])
+        if self.pre_gen_pte:
+            cmds.extend(["--pre_gen_pte", self.pre_gen_pte])
+
+        # Accuracy is bad for now. Just check user's prompt is returned.
+        golden_start_with = "My favourite condiment is "
+        p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
+        with Listener((self.ip, self.port)) as listener:
+            conn = listener.accept()
+            p.communicate()
+            msg = json.loads(conn.recv())
+            if "Error" in msg:
+                self.fail(msg["Error"])
+            else:
+                model_out = msg["result"][0]
+                self.assertTrue(
+                    model_out.startswith(golden_start_with),
+                    f"Expected Output: {golden_start_with}. Actual Output: {model_out}",
+                )
+                self.assertGreaterEqual(msg["inference_speed"], 70)  # Lanai
+
 
 class TestExampleOssScript(TestQNN):
     def test_albert(self):
 
@@ -4,7 +4,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 import collections
-import copy
 import os
 import subprocess
 import tempfile
@@ -30,7 +29,7 @@
     get_soc_to_chipset_map,
     to_edge_transform_and_lower_to_qnn,
 )
-from executorch.devtools import generate_etrecord, Inspector
+from executorch.devtools import Inspector
 from executorch.devtools.inspector._inspector_utils import TimeScale
 from executorch.examples.qualcomm.utils import (
     generate_inputs,
@@ -512,11 +511,9 @@ def lower_module_and_test_output(
             skip_node_id_set=skip_node_id_set,
             skip_node_op_set=skip_node_op_set,
             skip_mutable_buffer=skip_mutable_buffer,
+            generate_etrecord=self.enable_profile,
         )
 
-        # this is needed for the ETRecord as lowering modifies the graph in-place
-        edge_copy = copy.deepcopy(delegated_program)
-
         exec_prog = delegated_program.to_executorch(
             exir.ExecutorchBackendConfig(
                 # For shared buffer, user must pass the memory address
@@ -543,7 +540,7 @@ def lower_module_and_test_output(
 
         etrecord_path = "etrecord.bin"
         if self.enable_profile:
-            generate_etrecord(etrecord_path, edge_copy, exec_prog)
+            exec_prog.get_etrecord().save(etrecord_path)
         # Check numerics
         if (
             assert_output_equal
 
@@ -334,6 +334,7 @@ def to_edge_transform_and_lower_to_qnn(
     skip_node_id_set: Optional[set] = None,
     skip_node_op_set: Optional[set] = None,
     skip_mutable_buffer: bool = False,
+    generate_etrecord: bool = False,
 ) -> EdgeProgramManager:
     """
     Transforms and lowers a given PyTorch module to the QNN backend.
@@ -442,6 +443,7 @@ def ensure_graph_specific_dict(value, graph_names):
         partitioner=qnn_partitioners,
         constant_methods=constant_methods,
         compile_config=qnn_edge_config(),
+        generate_etrecord=generate_etrecord,
     )