|
46 | 46 | from_context_binary,
|
47 | 47 | generate_htp_compiler_spec,
|
48 | 48 | generate_qnn_executorch_compiler_spec,
|
| 49 | + is_qnn_sdk_version_less_than, |
49 | 50 | PyQnnManagerAdaptor,
|
50 | 51 | rewrite_prepared_observer,
|
51 | 52 | skip_annotation,
|
52 | 53 | to_edge_transform_and_lower_to_qnn,
|
53 | 54 | update_spill_fill_size,
|
54 | 55 | )
|
55 | 56 |
|
56 |
| -from executorch.examples.models.llama.llama_transformer import MOEFeedForward |
57 |
| - |
58 |
| -from executorch.examples.models.llama.model_args import ModelArgs |
59 |
| - |
60 | 57 | from executorch.examples.qualcomm.utils import (
|
61 | 58 | make_quantizer,
|
62 | 59 | setup_common_args_and_variables,
|
@@ -136,6 +133,13 @@ def test_qnn_backend_amax(self):
|
136 | 133 | with self.subTest(i=i):
|
137 | 134 | self.lower_module_and_test_output(module, sample_input)
|
138 | 135 |
|
| 136 | + def test_qnn_backend_amin(self): |
| 137 | + modules = [AMin(dim=1, keepdim=False), AMin(dim=1, keepdim=True)] # noqa: F405 |
| 138 | + sample_input = (torch.randn(4, 4),) |
| 139 | + for i, module in enumerate(modules): |
| 140 | + with self.subTest(i=i): |
| 141 | + self.lower_module_and_test_output(module, sample_input) |
| 142 | + |
139 | 143 | def test_qnn_backend_any(self):
|
140 | 144 | modules = [Any(), Any(dim=[0, 1]), Any(dim=1, keepdim=True)] # noqa: F405
|
141 | 145 | sample_input = (torch.randn(3, 3, 3) > 0,)
|
@@ -1227,6 +1231,9 @@ def test_qnn_backend_lift_add_tensor(self):
|
1227 | 1231 |
|
1228 | 1232 | @unittest.skip("Fail because of bad accuracy")
|
1229 | 1233 | def test_qnn_backend_moe_feed_forward(self):
|
| 1234 | + from executorch.examples.models.llama.llama_transformer import MOEFeedForward |
| 1235 | + from executorch.examples.models.llama.model_args import ModelArgs |
| 1236 | + |
1230 | 1237 | args = ModelArgs()
|
1231 | 1238 | args.dim = 32
|
1232 | 1239 | args.n_heads = 8
|
@@ -1421,6 +1428,14 @@ def test_qnn_backend_amax(self):
|
1421 | 1428 | module = self.get_qdq_module(module, sample_input)
|
1422 | 1429 | self.lower_module_and_test_output(module, sample_input)
|
1423 | 1430 |
|
| 1431 | + def test_qnn_backend_amin(self): |
| 1432 | + modules = [AMin(dim=1, keepdim=False), AMin(dim=1, keepdim=True)] # noqa: F405 |
| 1433 | + sample_input = (torch.randn(4, 4),) |
| 1434 | + for i, module in enumerate(modules): |
| 1435 | + with self.subTest(i=i): |
| 1436 | + module = self.get_qdq_module(module, sample_input) |
| 1437 | + self.lower_module_and_test_output(module, sample_input) |
| 1438 | + |
1424 | 1439 | def test_qnn_backend_any(self):
|
1425 | 1440 | modules = [Any(), Any(dim=[0, 1]), Any(dim=1, keepdim=True)] # noqa: F405
|
1426 | 1441 | sample_input = (torch.randn(3, 3, 3) > 0,)
|
@@ -2643,8 +2658,57 @@ def test_qnn_backend_einsum_outer_product_relu(self):
|
2643 | 2658 | module = self.get_qdq_module(module, sample_input)
|
2644 | 2659 | self.lower_module_and_test_output(module, sample_input)
|
2645 | 2660 |
|
| 2661 | + @unittest.skipIf(is_qnn_sdk_version_less_than("2.35"), "UT pass after QNN 2.35") |
| 2662 | + def test_qnn_backend_masked_softmax(self): |
| 2663 | + if self.enable_x86_64: |
| 2664 | + self.skipTest( |
| 2665 | + "At the moment, testing is only being conducted on the device." |
| 2666 | + ) |
| 2667 | + module = MaskedSoftmax() # noqa: F405 |
| 2668 | + kv_arange = torch.arange(128) |
| 2669 | + reshaped_cache_position = torch.tensor([[0]]) |
| 2670 | + |
| 2671 | + # Simplest and most efficient way to obtain a causal mask |
| 2672 | + causal_mask = kv_arange <= reshaped_cache_position |
| 2673 | + atten_mask = torch.full((1, 128), torch.tensor(-65535.0)) |
| 2674 | + atten_mask = atten_mask.masked_fill(causal_mask, 0) |
| 2675 | + atten_mask = atten_mask[None, None, :, :].expand(1, -1, -1, -1) |
| 2676 | + sample_input = (atten_mask, torch.randn([1, 1, 1, 128])) |
| 2677 | + # Masked softmax is only support in quantized model |
| 2678 | + module = self.get_qdq_module( |
| 2679 | + module, sample_input, quant_dtype=QuantDtype.use_16a8w |
| 2680 | + ) |
| 2681 | + backend_options = generate_htp_compiler_spec(use_fp16=False) |
| 2682 | + compiler_spec = generate_qnn_executorch_compiler_spec( |
| 2683 | + soc_model=self.chipset_table[TestQNN.model], |
| 2684 | + backend_options=backend_options, |
| 2685 | + optrace=True, |
| 2686 | + ) |
| 2687 | + with tempfile.TemporaryDirectory() as tmp_dir: |
| 2688 | + edge_prog_mgr = to_edge_transform_and_lower_to_qnn( |
| 2689 | + module, sample_input, compiler_spec |
| 2690 | + ).to_executorch() |
| 2691 | + pte_path = f"{tmp_dir}/model.pte" |
| 2692 | + with open(pte_path, "wb") as f: |
| 2693 | + edge_prog_mgr.write_to_file(f) |
| 2694 | + adb = self.get_adb_tool(pte_path) |
| 2695 | + binaries_trace = generate_optrace( |
| 2696 | + tmp_dir, self.chipset_table[self.model], adb, pte_path, sample_input |
| 2697 | + ) |
| 2698 | + has_masked_softmax = False |
| 2699 | + for _, (_, qhas) in binaries_trace.items(): |
| 2700 | + with open(qhas, "r") as qhas_file: |
| 2701 | + qhas_data = json.load(qhas_file) |
| 2702 | + for row in qhas_data["data"]["htp_op_types"]["data"]: |
| 2703 | + if "MaskedSoftmax" in row["op"]: |
| 2704 | + has_masked_softmax = True |
| 2705 | + self.assertTrue(has_masked_softmax) |
| 2706 | + |
2646 | 2707 | @unittest.skip("UT pass before QNN 2.26, segfault during partitioner")
|
2647 | 2708 | def test_qnn_backend_moe_feed_forward(self):
|
| 2709 | + from executorch.examples.models.llama.llama_transformer import MOEFeedForward |
| 2710 | + from executorch.examples.models.llama.model_args import ModelArgs |
| 2711 | + |
2648 | 2712 | args = ModelArgs()
|
2649 | 2713 | args.dim = 32
|
2650 | 2714 | args.n_heads = 8
|
|
0 commit comments