diff --git a/backends/arm/README.md b/backends/arm/README.md index 375259c62ab..6f4642f8d44 100644 --- a/backends/arm/README.md +++ b/backends/arm/README.md @@ -9,7 +9,7 @@ The expected flow is: * torch.nn.module -> TOSA -> command_stream for fully AoT flows e.g. embedded. * torch.nn.module -> TOSA for flows supporting a JiT compilation step. -Current backend support is being developed for TOSA to Ethos(TM)-U55/65 via the +Current backend support is being developed for TOSA to Ethos(TM)-U55/65/85 via the ethos-u-vela compilation stack. which follows the fully AoT flow. ## Layout diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py index 27fd36ca0e1..db8bad166cc 100644 --- a/backends/arm/arm_backend.py +++ b/backends/arm/arm_backend.py @@ -54,7 +54,7 @@ def ethosu_compile_spec( memory_mode: Optional[str] = None, extra_flags: Optional[str] = None, config_ini: Optional[str] = "Arm/vela.ini", - ): + ) -> "ArmCompileSpecBuilder": """ Generate compile spec for Ethos-U NPU @@ -84,7 +84,7 @@ def ethosu_compile_spec( return self - def tosa_compile_spec(self): + def tosa_compile_spec(self) -> "ArmCompileSpecBuilder": """ Generate compile spec for TOSA flatbuffer output """ @@ -94,14 +94,18 @@ def tosa_compile_spec(self): self.output_format = "tosa" return self - def dump_intermediate_artifacts_to(self, output_path: str): + def dump_intermediate_artifacts_to( + self, output_path: str + ) -> "ArmCompileSpecBuilder": """ Sets a path for dumping intermediate results during such as tosa and pte. """ self.path_for_intermediates = output_path return self - def set_permute_memory_format(self, set_nhwc_permutation: bool = True): + def set_permute_memory_format( + self, set_nhwc_permutation: bool = True + ) -> "ArmCompileSpecBuilder": """ Permute to channel last in compiler and runtime. Compilation and runtime will convert rank 4 inputs to channel last for each sub-graph. @@ -109,7 +113,7 @@ def set_permute_memory_format(self, set_nhwc_permutation: bool = True): self.permute_nhwc = set_nhwc_permutation return self - def set_quantize_io(self, quantize_io: bool = False): + def set_quantize_io(self, quantize_io: bool = False) -> "ArmCompileSpecBuilder": """ Quantization of inputs and dequantization of outputs for cases where whole graph is quantized and method signature is not of quantized type. @@ -117,7 +121,7 @@ def set_quantize_io(self, quantize_io: bool = False): self.quantize_io = quantize_io return self - def build(self): + def build(self) -> List[CompileSpec]: """ Generate a list of compile spec objects from the builder """ diff --git a/backends/arm/arm_vela.py b/backends/arm/arm_vela.py index 53533947c49..614ecdcdd9e 100644 --- a/backends/arm/arm_vela.py +++ b/backends/arm/arm_vela.py @@ -48,7 +48,12 @@ def vela_compile(tosa_graph, args: List[str]): args.append(tosa_path) vela.main(" ".join(args).split(" ")) - np_path = os.path.join(output_dir, "out_sg0_vela.npz") + if any("ethos-u85" in arg for arg in args) or any( + "debug-force-regor" in arg for arg in args + ): + np_path = os.path.join(tmpdir, "output", "out_vela.npz") + else: + np_path = os.path.join(tmpdir, "output", "out_sg0_vela.npz") blocks = b"" with np.load(np_path, allow_pickle=False) as data: diff --git a/backends/arm/runtime/ArmBackendEthosU.cpp b/backends/arm/runtime/ArmBackendEthosU.cpp index 26ffb0b9700..6d9ab6b0091 100644 --- a/backends/arm/runtime/ArmBackendEthosU.cpp +++ b/backends/arm/runtime/ArmBackendEthosU.cpp @@ -281,18 +281,27 @@ class ArmBackend final : public ::executorch::runtime::BackendInterface { } } if (!permuted_shape) { - // Error check matching shapes in the general case + // Check the number of elements in each tensor match + int tensor_count = 1; + int io_count = 1; + for (int i = 0; i < tensor.dim(); i++) { - if (tensor.size(i) != io->shape[i]) { - ET_LOG(Error, "Tensor input/output %d mismatched shape", index); - ET_LOG( - Error, - "dimension %d mismatch, %zd != %d", - index, - tensor.size(i), - io->shape[i]); - return Error::InvalidProgram; - } + tensor_count = tensor_count * tensor.size(i); + } + + // The VelaIO type has a shape of fixed size 4 + for (int i = 0; i < 4; i++) { + io_count = io_count * io->shape[i]; + } + + if (tensor_count != io_count) { + ET_LOG(Error, "Input tensor sizes do not match"); + ET_LOG( + Error, + "Program expects %d elements but got %d", + io_count, + tensor_count); + return Error::InvalidProgram; } } *is_permuted = permuted_shape; diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py index f85fd1f2dac..0d50f1882da 100644 --- a/backends/arm/test/common.py +++ b/backends/arm/test/common.py @@ -14,6 +14,7 @@ import torch from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder +from executorch.exir.backend.compile_spec_schema import CompileSpec _enabled_options: list[str] = [] @@ -85,7 +86,9 @@ def is_option_enabled(option: str, fail_if_not_enabled: bool = False) -> bool: return False -def get_tosa_compile_spec(permute_memory_to_nhwc=True, custom_path=None): +def get_tosa_compile_spec( + permute_memory_to_nhwc=True, custom_path=None +) -> list[CompileSpec]: """ Default compile spec for TOSA tests. """ @@ -112,8 +115,8 @@ def get_tosa_compile_spec_unbuilt( def get_u55_compile_spec( - permute_memory_to_nhwc=False, quantize_io=False, custom_path=None -): + permute_memory_to_nhwc=True, quantize_io=False, custom_path=None +) -> list[CompileSpec]: """ Default compile spec for Ethos-U55 tests. """ @@ -122,10 +125,21 @@ def get_u55_compile_spec( ).build() +def get_u85_compile_spec( + permute_memory_to_nhwc=True, quantize_io=False, custom_path=None +) -> list[CompileSpec]: + """ + Default compile spec for Ethos-U85 tests. + """ + return get_u85_compile_spec_unbuilt( + permute_memory_to_nhwc, quantize_io=quantize_io, custom_path=custom_path + ).build() + + def get_u55_compile_spec_unbuilt( - permute_memory_to_nhwc=False, quantize_io=False, custom_path=None + permute_memory_to_nhwc=True, quantize_io=False, custom_path=None ) -> ArmCompileSpecBuilder: - """Get the ArmCompileSpecBuilder for the default TOSA tests, to modify + """Get the ArmCompileSpecBuilder for the Ethos-U55 tests, to modify the compile spec before calling .build() to finalize it. """ artifact_path = custom_path or tempfile.mkdtemp(prefix="arm_u55_") @@ -137,7 +151,29 @@ def get_u55_compile_spec_unbuilt( "ethos-u55-128", system_config="Ethos_U55_High_End_Embedded", memory_mode="Shared_Sram", - extra_flags=None, + extra_flags="--debug-force-regor --output-format=raw", + ) + .set_quantize_io(is_option_enabled("quantize_io") or quantize_io) + .set_permute_memory_format(permute_memory_to_nhwc) + .dump_intermediate_artifacts_to(artifact_path) + ) + return compile_spec + + +def get_u85_compile_spec_unbuilt( + permute_memory_to_nhwc=True, quantize_io=False, custom_path=None +) -> list[CompileSpec]: + """Get the ArmCompileSpecBuilder for the Ethos-U85 tests, to modify + the compile spec before calling .build() to finalize it. + """ + artifact_path = custom_path or tempfile.mkdtemp(prefix="arm_u85_") + compile_spec = ( + ArmCompileSpecBuilder() + .ethosu_compile_spec( + "ethos-u85-128", + system_config="Ethos_U85_SYS_DRAM_Mid", + memory_mode="Shared_Sram", + extra_flags="--output-format=raw", ) .set_quantize_io(is_option_enabled("quantize_io") or quantize_io) .set_permute_memory_format(permute_memory_to_nhwc) diff --git a/backends/arm/test/ops/test_cat.py b/backends/arm/test/ops/test_cat.py index f677aa5590c..a40ae43b673 100644 --- a/backends/arm/test/ops/test_cat.py +++ b/backends/arm/test/ops/test_cat.py @@ -125,7 +125,9 @@ def test_cat_tosa_BI(self, operands: tuple[torch.Tensor, ...], dim: int): test_data = (operands, dim) self._test_cat_tosa_BI_pipeline(self.Cat(), test_data) + # TODO: Remove @unittest.expectedFailure when this issue is fixed in Regor @parameterized.expand(Cat.test_parameters) + @unittest.expectedFailure def test_cat_u55_BI(self, operands: tuple[torch.Tensor, ...], dim: int): test_data = (operands, dim) self._test_cat_u55_BI_pipeline(self.Cat(), test_data) diff --git a/backends/arm/test/ops/test_exp.py b/backends/arm/test/ops/test_exp.py index 79020ade25c..4f4935d482c 100644 --- a/backends/arm/test/ops/test_exp.py +++ b/backends/arm/test/ops/test_exp.py @@ -103,8 +103,6 @@ def test_exp_tosa_MI( def test_exp_tosa_BI(self, test_name: str, test_data: torch.Tensor): self._test_exp_tosa_BI_pipeline(self.Exp(), (test_data,)) - # Fails due to Vela diff from Tosa spec, expected to work with Regor. @parameterized.expand(test_data_suite) - @unittest.expectedFailure def test_exp_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): self._test_exp_tosa_u55_BI_pipeline(self.Exp(), (test_data,)) diff --git a/backends/arm/test/ops/test_log.py b/backends/arm/test/ops/test_log.py index 80bc17c987f..90066b3a63b 100644 --- a/backends/arm/test/ops/test_log.py +++ b/backends/arm/test/ops/test_log.py @@ -103,8 +103,6 @@ def test_log_tosa_MI( def test_log_tosa_BI(self, test_name: str, test_data: torch.Tensor): self._test_log_tosa_BI_pipeline(self.Log(), (test_data,)) - # Fails due to Vela diff from Tosa spec, logected to work with Regor. @parameterized.expand(test_data_suite) - @unittest.expectedFailure def test_log_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): self._test_log_tosa_u55_BI_pipeline(self.Log(), (test_data,)) diff --git a/backends/arm/test/ops/test_mul.py b/backends/arm/test/ops/test_mul.py index dee8b62f1b2..2aac3f22f13 100644 --- a/backends/arm/test/ops/test_mul.py +++ b/backends/arm/test/ops/test_mul.py @@ -141,9 +141,7 @@ def test_mul_tosa_BI( test_data = (input_, other_) self._test_mul_tosa_BI_pipeline(self.Mul(), test_data) - # Expected to fail since RESCALE cannot be fused with MUL in Vela. @parameterized.expand(test_data_sute) - @unittest.expectedFailure def test_mul_u55_BI( self, test_name: str, diff --git a/backends/arm/test/ops/test_sigmoid.py b/backends/arm/test/ops/test_sigmoid.py index 7a0435689f4..369019774fa 100644 --- a/backends/arm/test/ops/test_sigmoid.py +++ b/backends/arm/test/ops/test_sigmoid.py @@ -145,8 +145,6 @@ def test_sigmoid_add_sigmoid_tosa_BI(self): self.SigmoidAddSigmoid(), (test_data_suite[4][1], test_data_suite[3][1]) ) - # Fails due to Vela diff from Tosa spec, expected to work with Regor. @parameterized.expand(test_data_suite) - @unittest.expectedFailure def test_sigmoid_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): self._test_sigmoid_tosa_u55_BI_pipeline(self.Sigmoid(), (test_data,)) diff --git a/backends/arm/test/ops/test_sub.py b/backends/arm/test/ops/test_sub.py index 2ae7c3ab36f..0a9f159f365 100644 --- a/backends/arm/test/ops/test_sub.py +++ b/backends/arm/test/ops/test_sub.py @@ -104,9 +104,7 @@ def test_sub_tosa_BI(self, test_data: torch.Tensor): test_data = (test_data,) self._test_sub_tosa_BI_pipeline(self.Sub(), test_data) - # Expected to fail since RESCALE cannot be fused with SUB in Vela. @parameterized.expand(Sub.test_parameters) - @unittest.expectedFailure def test_sub_u55_BI(self, test_data: torch.Tensor): test_data = (test_data,) self._test_sub_u55_BI_pipeline(self.Sub(), test_data) diff --git a/examples/arm/setup.sh b/examples/arm/setup.sh index 9cef98e6227..8c39a3a8668 100755 --- a/examples/arm/setup.sh +++ b/examples/arm/setup.sh @@ -216,7 +216,7 @@ function setup_vela() { if [[ ! -e ethos-u-vela ]]; then git clone https://review.mlplatform.org/ml/ethos-u/ethos-u-vela repo_dir="${root_dir}/ethos-u-vela" - base_rev=7706c1281166e7611f4300ed26338087152a33c9 + base_rev=d362f5443f67b1e6213a9d8f124edff758efac96 patch_repo fi cd "${root_dir}/ethos-u-vela"