pytorch
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 23 additions & 2 deletions b/‎.ci/scripts/test_model.sh‎
Lines changed: 23 additions & 2 deletions
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/arm/test/test_arm_baremetal.sh‎
Lines changed: 20 additions & 6 deletions b/‎backends/arm/test/test_arm_baremetal.sh‎
Lines changed: 20 additions & 6 deletions
diff --git a/‎backends/arm/test/test_model.py‎
Lines changed: 26 additions & 4 deletions b/‎backends/arm/test/test_model.py‎
Lines changed: 26 additions & 4 deletions
diff --git a/‎backends/qualcomm/builders/op_slice_copy.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/qualcomm/builders/op_slice_copy.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/qualcomm/quantizer/custom_annotation.py‎
Lines changed: 40 additions & 0 deletions b/‎backends/qualcomm/quantizer/custom_annotation.py‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎backends/qualcomm/tests/test_qnn_delegate.py‎
Lines changed: 16 additions & 16 deletions b/‎backends/qualcomm/tests/test_qnn_delegate.py‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎backends/xnnpack/quantizer/xnnpack_quantizer_utils.py‎
Lines changed: 0 additions & 1 deletion b/‎backends/xnnpack/quantizer/xnnpack_quantizer_utils.py‎
Lines changed: 0 additions & 1 deletion
@@ -188,6 +188,22 @@ test_model_with_qnn() {
     EXPORT_SCRIPT=edsr
     # Additional deps for edsr
     pip install piq
+  elif [[ "${MODEL_NAME}" == "cvt" ]]; then
+    EXPORT_SCRIPT=cvt
+  elif [[ "${MODEL_NAME}" == "dit" ]]; then
+    EXPORT_SCRIPT=dit
+  elif [[ "${MODEL_NAME}" == "efficientnet" ]]; then
+    EXPORT_SCRIPT=efficientnet
+  elif [[ "${MODEL_NAME}" == "focalnet" ]]; then
+    EXPORT_SCRIPT=focalnet
+  elif [[ "${MODEL_NAME}" == "mobilevit_v1" ]]; then
+    EXPORT_SCRIPT=mobilevit_v1
+  elif [[ "${MODEL_NAME}" == "mobilevit_v2" ]]; then
+    EXPORT_SCRIPT=mobilevit_v2
+  elif [[ "${MODEL_NAME}" == "pvt" ]]; then
+    EXPORT_SCRIPT=pvt
+  elif [[ "${MODEL_NAME}" == "swin" ]]; then
+    EXPORT_SCRIPT=swin_transformer
   elif [[ "${MODEL_NAME}" == "albert" ]]; then
     EXPORT_SCRIPT=albert
   elif [[ "${MODEL_NAME}" == "bert" ]]; then
@@ -196,6 +212,8 @@ test_model_with_qnn() {
     EXPORT_SCRIPT=distilbert
   elif [[ "${MODEL_NAME}" == "eurobert" ]]; then
     EXPORT_SCRIPT=eurobert
+  elif [[ "${MODEL_NAME}" == "roberta" ]]; then
+    EXPORT_SCRIPT=roberta
   else
     echo "Unsupported model $MODEL_NAME"
     exit 1
@@ -210,10 +228,13 @@ test_model_with_qnn() {
     "dl3"|"mv3"|"mv2"|"ic4"|"ic3"|"vit"|"mb"|"w2l")
         SCRIPT_FOLDER=scripts
         ;;
-    "albert"|"bert"|"distilbert")
+    "cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin")
+        SCRIPT_FOLDER=oss_scripts
+        ;;
+    "albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
         pip install evaluate
         SCRIPT_FOLDER=oss_scripts
-        # Bert models running in 16bit will encounter op validation fail on some operations,
+        # 16bit models will encounter op validation fail on some operations,
         # which requires CHIPSET >= SM8550.
         QNN_CHIPSET=SM8550
         ;;
 
@@ -39,7 +39,7 @@ jobs:
         id: set_version
         shell: bash
         run: |
-          VERSION="0.7.0.$(TZ='PST8PDT' date +%Y%m%d)"
+          VERSION="0.8.0.$(TZ='PST8PDT' date +%Y%m%d)"
           echo "version=$VERSION" >> "$GITHUB_OUTPUT"
 
   build-demo-ios:
 
@@ -470,7 +470,7 @@ jobs:
       docker-image: executorch-ubuntu-22.04-qnn-sdk
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
+      timeout: 90
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
@@ -489,14 +489,14 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        model: [albert, bert, distilbert] # eurobert requires transfomer >= 4.48.0, skip for now
+        model: [cvt, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2, pvt, swin, albert, bert, distilbert, roberta] # eurobert requires transfomer >= 4.48.0, skip for now
       fail-fast: false
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-qnn-sdk
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
+      timeout: 90
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
 
@@ -73,22 +73,32 @@ all() { # Run all tests
 test_pytest_ops() { # Test ops and other things
     echo "${TEST_SUITE_NAME}: Run pytest"
 
+    # Make sure to not run this tests on FVP by removing the elf builds,
+    # as they are detected by the unit tests and used if they exists
+    rm -Rf arm_test/arm_semihosting_executor_runner_corstone-300
+    rm -Rf arm_test/arm_semihosting_executor_runner_corstone-320
+
     # Prepare for pytest
     backends/arm/scripts/build_executorch.sh
 
     # Run arm baremetal pytest tests without FVP
-    pytest  --verbose --color=yes --numprocesses=auto backends/arm/test/ --ignore=backends/arm/test/models
+    pytest  --verbose --color=yes --numprocesses=auto --durations=10 backends/arm/test/ --ignore=backends/arm/test/models
     echo "${TEST_SUITE_NAME}: PASS"
 }
 
 test_pytest_models() { # Test ops and other things
     echo "${TEST_SUITE_NAME}: Run pytest"
 
+    # Make sure to not run this tests on FVP by removing the elf builds,
+    # as they are detected by the unit tests and used if they exists
+    rm -Rf arm_test/arm_semihosting_executor_runner_corstone-300
+    rm -Rf arm_test/arm_semihosting_executor_runner_corstone-320
+
     # Prepare for pytest
     backends/arm/scripts/build_executorch.sh
 
     # Run arm baremetal pytest tests without FVP
-    pytest  --verbose --color=yes backends/arm/test/models
+    pytest  --verbose --color=yes --durations=0 backends/arm/test/models
     echo "${TEST_SUITE_NAME}: PASS"
 }
 
@@ -105,11 +115,13 @@ test_pytest_ops_ethosu_fvp() { # Same as test_pytest but also sometime verify us
     # Prepare Corstone-3x0 FVP for pytest
     backends/arm/scripts/build_executorch.sh
     backends/arm/scripts/build_portable_kernels.sh
-    # Build semihosting version of the runner used by pytest testing when
+    # Build semihosting version of the runner used by pytest testing. This builds:
+    # arm_test/arm_semihosting_executor_runner_corstone-300
+    # arm_test/arm_semihosting_executor_runner_corstone-320
     backends/arm/test/setup_testing.sh
 
     # Run arm baremetal pytest tests with FVP
-    pytest  --verbose --color=yes --numprocesses=auto backends/arm/test/ --ignore=backends/arm/test/models
+    pytest  --verbose --color=yes --numprocesses=auto --durations=10  backends/arm/test/ --ignore=backends/arm/test/models
     echo "${TEST_SUITE_NAME}: PASS"
 }
 
@@ -119,11 +131,13 @@ test_pytest_models_ethosu_fvp() { # Same as test_pytest but also sometime verify
     # Prepare Corstone-3x0 FVP for pytest
     backends/arm/scripts/build_executorch.sh
     backends/arm/scripts/build_portable_kernels.sh
-    # Build semihosting version of the runner used by pytest testing
+    # Build semihosting version of the runner used by pytest testing. This builds:
+    # arm_test/arm_semihosting_executor_runner_corstone-300
+    # arm_test/arm_semihosting_executor_runner_corstone-320
     backends/arm/test/setup_testing.sh
 
     # Run arm baremetal pytest tests with FVP
-    pytest  --verbose --color=yes backends/arm/test/models
+    pytest  --verbose --color=yes --durations=0 backends/arm/test/models
     echo "${TEST_SUITE_NAME}: PASS"
 }
 
 
@@ -7,6 +7,7 @@
 import os
 import subprocess
 import sys
+import time
 
 
 def get_args():
@@ -199,12 +200,17 @@ def run_elf_with_fvp(script_path: str, elf_file: str, target: str, timeout: int)
 
 
 if __name__ == "__main__":
-
+    total_start_time = time.perf_counter()
     args = get_args()
     script_path = os.path.join("backends", "arm", "scripts")
 
     if args.build_libs:
+        start_time = time.perf_counter()
         build_libs(args.test_output, script_path)
+        end_time = time.perf_counter()
+        print(
+            f"[Test model: {end_time - start_time:.2f} s] Build needed executorch libs"
+        )
 
     if args.model:
         model_name = args.model.split(" ")[0].split(";")[0]
@@ -217,6 +223,7 @@ def run_elf_with_fvp(script_path: str, elf_file: str, target: str, timeout: int)
             args.test_output, f"{model_name}_arm_delegate_{args.target}"
         )
 
+        start_time = time.perf_counter()
         pte_file = build_pte(
             args.test_output,
             model_name,
@@ -226,13 +233,17 @@ def run_elf_with_fvp(script_path: str, elf_file: str, target: str, timeout: int)
             output,
             args.no_intermediate,
         )
-        print(f"PTE file created: {pte_file} ")
+        end_time = time.perf_counter()
+        print(
+            f"[Test model: {end_time - start_time:.2f} s] PTE file created: {pte_file}"
+        )
 
         if "ethos-u" in args.target:
             elf_build_path = os.path.join(
                 output, f"{model_name}_arm_delegate_{args.target}"
             )
 
+            start_time = time.perf_counter()
             elf_file = build_ethosu_runtime(
                 args.test_output,
                 script_path,
@@ -243,7 +254,18 @@ def run_elf_with_fvp(script_path: str, elf_file: str, target: str, timeout: int)
                 args.extra_flags,
                 elf_build_path,
             )
-            print(f"ELF file created: {elf_file} ")
+            end_time = time.perf_counter()
+            print(
+                f"[Test model: {end_time - start_time:.2f} s] ELF file created: {elf_file}"
+            )
 
+            start_time = time.perf_counter()
             run_elf_with_fvp(script_path, elf_file, args.target, args.timeout)
-        print(f"Model: {model_name} on {args.target} -> PASS")
+            end_time = time.perf_counter()
+            print(
+                f"[Test model: {end_time - start_time:.2f} s] Tested elf on FVP {elf_file}"
+            )
+        total_end_time = time.perf_counter()
+        print(
+            f"[Test model: {total_end_time - total_start_time:.2f} s total] Model: {model_name} on {args.target} -> PASS"
+        )
@@ -56,7 +56,7 @@ def define_node(
         if start < 0:
             start = start % input_tensor.shape[dim]
 
-        if len(node.args) > 3:
+        if len(node.args) > 3 and node.args[3] is not None:
             end = min(cast(int, node.args[3]), input_tensor.shape[dim])
             if end < 0:
                 end = end % input_tensor.shape[dim]
 
@@ -227,6 +227,40 @@ def annotate_single_in_single_out(
             _annotated=True,
         )
 
+    def annotate_single_in_share_out(
+        node: Node, quantization_config: QuantizationConfig
+    ) -> None:
+
+        input_qspec_map = {}
+        input_act = node.args[0]
+        input_qspec_map[input_act] = quantization_config.input_activation
+
+        node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation(
+            input_qspec_map=input_qspec_map,
+            output_qspec=SharedQuantizationSpec((input_act, node)),
+            _annotated=True,
+        )
+
+    def annotate_stack(node: Node, quantization_config: QuantizationConfig) -> None:
+        input_nodes = node.args[0]
+
+        first_input_node = input_nodes[0]
+        input_qspec_map = {}
+        input_qspec_map[first_input_node] = quantization_config.input_activation
+        share_qparams_with_input_act0_qspec = SharedQuantizationSpec(
+            (first_input_node, node)
+        )
+
+        for input_node in input_nodes[1:]:
+            if input_node not in input_qspec_map:
+                input_qspec_map[input_node] = share_qparams_with_input_act0_qspec
+
+        node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation(
+            input_qspec_map=input_qspec_map,
+            output_qspec=share_qparams_with_input_act0_qspec,
+            _annotated=True,
+        )
+
     def annotate_matmul_input1(node: Node):
         quantization_config_8a8w = get_8a8w_qnn_ptq_config(
             act_symmetric=True, act_observer=MinMaxObserver
@@ -247,6 +281,12 @@ def annotate_matmul_input1(node: Node):
             ]:
                 annotate_single_in_single_out(node, quantization_config_8a8w)
                 node = node.args[0]
+            elif node.target == torch.ops.aten.stack.default:
+                annotate_stack(node, quantization_config_8a8w)
+                node = node.args[0]
+            elif node.target == torch.ops.aten.flatten.using_ints:
+                annotate_single_in_share_out(node, quantization_config_8a8w)
+                node = node.args[0]
             elif node.target == torch.ops.aten.cat.default:
                 annotate_cat(node, quantization_config_8a8w)
                 # For v, we tag 8a until conv op.
 
@@ -4585,14 +4585,13 @@ def test_gMLP(self):
                 self.assertGreaterEqual(msg["top_1"], 60)
                 self.assertGreaterEqual(msg["top_5"], 85)
 
-    @unittest.skip("Only outputs good accuracy in QNN 2.29")
-    def test_mobilevit_v2(self):
+    def test_mobilevit_v1(self):
         if not self.required_envs([self.image_dataset]):
             self.skipTest("missing required envs")
 
         cmds = [
             "python",
-            f"{self.executorch_root}/examples/qualcomm/oss_scripts/mobilevit_v2.py",
+            f"{self.executorch_root}/examples/qualcomm/oss_scripts/mobilevit_v1.py"
             "--dataset",
             self.image_dataset,
             "--artifact",
@@ -4610,8 +4609,6 @@ def test_mobilevit_v2(self):
         ]
         if self.host:
             cmds.extend(["--host", self.host])
-        if self.shared_buffer:
-            cmds.extend(["--shared_buffer"])
 
         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:
@@ -4621,17 +4618,22 @@ def test_mobilevit_v2(self):
             if "Error" in msg:
                 self.fail(msg["Error"])
             else:
-                self.assertGreaterEqual(msg["top_1"], 50)
+                self.assertGreaterEqual(msg["top_1"], 70)
                 self.assertGreaterEqual(msg["top_5"], 85)
 
-    def test_pvt(self):
+    @unittest.skip("Only outputs good accuracy in QNN 2.29")
+    def test_mobilevit_v2(self):
         if not self.required_envs([self.image_dataset]):
             self.skipTest("missing required envs")
 
         cmds = [
             "python",
-            f"{self.executorch_root}/examples/qualcomm/oss_scripts/pvt.py",
+            f"{self.executorch_root}/examples/qualcomm/oss_scripts/mobilevit_v2.py",
+            "--dataset",
             self.image_dataset,
+            "--artifact",
+            self.artifact_dir,
+            "--build_folder",
             self.build_folder,
             "--device",
             self.device,
@@ -4644,6 +4646,8 @@ def test_pvt(self):
         ]
         if self.host:
             cmds.extend(["--host", self.host])
+        if self.shared_buffer:
+            cmds.extend(["--shared_buffer"])
 
         p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
         with Listener((self.ip, self.port)) as listener:
@@ -4653,21 +4657,17 @@ def test_pvt(self):
             if "Error" in msg:
                 self.fail(msg["Error"])
             else:
-                self.assertGreaterEqual(msg["top_1"], 65)
+                self.assertGreaterEqual(msg["top_1"], 50)
                 self.assertGreaterEqual(msg["top_5"], 85)
 
-    def test_mobilevit1(self):
+    def test_pvt(self):
         if not self.required_envs([self.image_dataset]):
             self.skipTest("missing required envs")
 
         cmds = [
             "python",
-            f"{self.executorch_root}/examples/qualcomm/oss_scripts/mobilevit1.py"
-            "--dataset",
+            f"{self.executorch_root}/examples/qualcomm/oss_scripts/pvt.py",
             self.image_dataset,
-            "--artifact",
-            self.artifact_dir,
-            "--build_folder",
             self.build_folder,
             "--device",
             self.device,
@@ -4689,7 +4689,7 @@ def test_mobilevit1(self):
             if "Error" in msg:
                 self.fail(msg["Error"])
             else:
-                self.assertGreaterEqual(msg["top_1"], 70)
+                self.assertGreaterEqual(msg["top_1"], 65)
                 self.assertGreaterEqual(msg["top_5"], 85)
 
     def test_regnet(self):
 
@@ -271,7 +271,6 @@ def _do_annotate_conv(
 
         # skip if transposed conv has more than 1 group
         skip = skip or (is_conv_transpose and num_groups != 1)
-        print(f"{skip} conv transpose and num_groups")
 
         if is_conv_transpose:
             # transposed convs per output channel quantization