diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py index f1b97625728..7ebf89e3927 100644 --- a/backends/arm/test/common.py +++ b/backends/arm/test/common.py @@ -145,14 +145,3 @@ def get_u85_compile_spec_unbuilt( .dump_intermediate_artifacts_to(artifact_path) ) return compile_spec - - -def get_target_board(compile_spec: list[CompileSpec]) -> str | None: - for spec in compile_spec: - if spec.key == "compile_flags": - flags = spec.value.decode() - if "u55" in flags: - return "corstone-300" - elif "u85" in flags: - return "corstone-320" - return None diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py index d29695dedf3..509690dd2fb 100644 --- a/backends/arm/test/models/test_mobilenet_v2_arm.py +++ b/backends/arm/test/models/test_mobilenet_v2_arm.py @@ -98,7 +98,7 @@ def test_mv2_u55_BI(self): ) if conftest.is_option_enabled("corstone_fvp"): tester.run_method_and_compare_outputs( - atol=1.0, qtol=1, inputs=self.model_inputs, target_board="corstone-300" + atol=1.0, qtol=1, inputs=self.model_inputs ) @pytest.mark.slow @@ -118,5 +118,5 @@ def test_mv2_u85_BI(self): ) if conftest.is_option_enabled("corstone_fvp"): tester.run_method_and_compare_outputs( - atol=1.0, qtol=1, inputs=self.model_inputs, target_board="corstone-320" + atol=1.0, qtol=1, inputs=self.model_inputs ) diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py index e3502baf2c7..71d9feca8bf 100644 --- a/backends/arm/test/ops/test_max_pool.py +++ b/backends/arm/test/ops/test_max_pool.py @@ -173,9 +173,7 @@ def test_maxpool2d_tosa_u55_BI( (test_data,), ) if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs( - qtol=1, inputs=(test_data,), target_board="corstone-300" - ) + tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,)) @parameterized.expand(test_data_suite) @pytest.mark.corstone_fvp @@ -191,9 +189,7 @@ def test_maxpool2d_tosa_u85_BI( (test_data,), ) if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs( - qtol=1, inputs=(test_data,), target_board="corstone-320" - ) + tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,)) @parameterized.expand(test_data_suite_mult_batches) def test_maxpool2d_tosa_MI_mult_batches( @@ -232,9 +228,7 @@ def test_maxpool2d_tosa_u55_BI_mult_batches( (test_data,), ) if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs( - qtol=1, inputs=(test_data,), target_board="corstone-300" - ) + tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,)) @parameterized.expand(test_data_suite_mult_batches) @pytest.mark.corstone_fvp @@ -251,6 +245,4 @@ def test_maxpool2d_tosa_u85_BI_mult_batches( (test_data,), ) if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs( - qtol=1, inputs=(test_data,), target_board="corstone-320" - ) + tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,)) diff --git a/backends/arm/test/ops/test_maximum.py b/backends/arm/test/ops/test_maximum.py index c6280fafbbd..a365642b450 100644 --- a/backends/arm/test/ops/test_maximum.py +++ b/backends/arm/test/ops/test_maximum.py @@ -121,9 +121,7 @@ def test_maximum_u55_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): self.Maximum(), common.get_u55_compile_spec(), test_data ) if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs( - qtol=1, inputs=test_data, target_board="corstone-300" - ) + tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) @parameterized.expand(Maximum.test_parameters) def test_maximum_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): @@ -132,6 +130,4 @@ def test_maximum_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): self.Maximum(), common.get_u85_compile_spec(), test_data ) if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs( - qtol=1, inputs=test_data, target_board="corstone-320" - ) + tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) diff --git a/backends/arm/test/ops/test_minimum.py b/backends/arm/test/ops/test_minimum.py index bed0484df7a..c1a526fb5f9 100644 --- a/backends/arm/test/ops/test_minimum.py +++ b/backends/arm/test/ops/test_minimum.py @@ -121,9 +121,7 @@ def test_minimum_u55_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): self.Minimum(), common.get_u55_compile_spec(), test_data ) if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs( - qtol=1, inputs=test_data, target_board="corstone-300" - ) + tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) @parameterized.expand(Minimum.test_parameters) def test_minimum_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): @@ -133,5 +131,6 @@ def test_minimum_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): ) if conftest.is_option_enabled("corstone_fvp"): tester.run_method_and_compare_outputs( - qtol=1, inputs=test_data, target_board="corstone-320" + qtol=1, + inputs=test_data, ) diff --git a/backends/arm/test/runner_utils.py b/backends/arm/test/runner_utils.py index 3851e41b73e..ef779971a8b 100644 --- a/backends/arm/test/runner_utils.py +++ b/backends/arm/test/runner_utils.py @@ -10,8 +10,8 @@ import shutil import subprocess import tempfile - from pathlib import Path + from typing import cast, Dict, List, Literal, Optional, Tuple import numpy as np @@ -21,13 +21,14 @@ from executorch.backends.arm.test.conftest import is_option_enabled from executorch.backends.arm.tosa_specification import TosaSpecification +from executorch.exir import ExecutorchProgramManager, ExportedProgram +from executorch.exir.backend.compile_spec_schema import CompileSpec from executorch.exir.lowered_backend_module import LoweredBackendModule - from packaging.version import Version -from torch.export import ExportedProgram from torch.fx.node import Node from torch.overrides import TorchFunctionMode +from torch.testing._internal.common_utils import torch_to_numpy_dtype_dict from tosa import TosaGraph logger = logging.getLogger(__name__) @@ -55,7 +56,7 @@ def __init__( self.dtype = dtype -def _get_input_names(program: ExportedProgram) -> list[str]: +def get_input_names(program: ExportedProgram) -> list[str]: """ Get a list[str] with the names of the inputs to this model. @@ -76,7 +77,7 @@ def _get_input_names(program: ExportedProgram) -> list[str]: return input_names -def _get_input_quantization_params( +def get_input_quantization_params( program: ExportedProgram, ) -> list[QuantizationParams]: """ @@ -85,12 +86,10 @@ def _get_input_quantization_params( program (ExportedProgram): The program to get input quantization parameters from. Returns: list[QuantizationParams]: The found quantization parameters. - Raises: - RuntimeError if no quantization parameters are found. """ quant_params = [] - input_names = _get_input_names(program) + input_names = get_input_names(program) num_inputs = len(input_names) for node in program.graph.nodes: if ( @@ -115,7 +114,7 @@ def _get_input_quantization_params( return quant_params -def _get_output_nodes(program: ExportedProgram) -> list[Node]: +def get_output_nodes(program: ExportedProgram) -> list[Node]: """ Get output node to this model. @@ -135,33 +134,32 @@ def _get_output_nodes(program: ExportedProgram) -> list[Node]: return output_nodes -def _get_output_quantization_params( +def get_output_quantization_params( output_nodes: list[Node], -) -> List[QuantizationParams]: +) -> dict[Node, QuantizationParams | None]: """ Get output QuantizationParams from a program. Args: output_nodes (list(Node)): A list of output nodes to get output quantization parameters from. Returns: - QuantizationParams: The found quantization parameters. + dictionary mapping the output nodes to the found quantization parameters. + If no quantization parameters were found, the entry is None. Raises: RuntimeError if no output quantization parameters are found. """ - quant_params = [] + quant_params = {} for node in output_nodes: if node.target == torch.ops.quantized_decomposed.dequantize_per_tensor.default: - quant_params.append( - QuantizationParams( - node_name=node.args[0].name, - scale=node.args[1], - zp=node.args[2], - qmin=node.args[3], - qmax=node.args[4], - dtype=node.args[5], - ) + quant_params[node] = QuantizationParams( + node_name=node.args[0].name, + scale=node.args[1], + zp=node.args[2], + qmin=node.args[3], + qmax=node.args[4], + dtype=node.args[5], ) - if len(quant_params) == 0: - raise RuntimeError("No Quantization parameters not found in exported model.") + else: + quant_params[node] = None return quant_params @@ -177,7 +175,7 @@ def _tosa_dispatch(self, lowered_backend_module: LoweredBackendModule, inputs): ) tosa_version = get_tosa_version(compile_specs) - return run_tosa_graph_static(tosa_buffer, tosa_version, inputs) + return run_tosa_graph(tosa_buffer, tosa_version, inputs) def __torch_function__(self, func, types, args=..., kwargs=None): if isinstance(func, torch._higher_order_ops.executorch_call_delegate.ExecutorchCallDelegate): # type: ignore @@ -193,105 +191,65 @@ def __torch_function__(self, func, types, args=..., kwargs=None): return func(*args, **kwargs) -""" -A class to store parameters needed for running programs, either in tosa or .pte format. -""" - - -class RunnerUtil: - def __init__( - self, - intermediate_path: str, - tosa_ref_model_path: Optional[str] = None, - ): - self.intermediate_path = intermediate_path - self.tosa_ref_model_path = tosa_ref_model_path or "tosa_reference_model" - assert self.intermediate_path is None or os.path.exists( - self.intermediate_path - ), f"TOSA artifact path don't exist! Path: {self.intermediate_path}" - - self.is_quantized: bool = False - self.input_names: list[str] = None - self.output_name: str = None - self.qp_input: list[QuantizationParams] = None - self.qp_output: list[QuantizationParams] = None - self.timeout = 480 - self.target_board: str = None - - self._has_init_run = False - - def init_run( - self, - exported_program: ExportedProgram, - edge_program: ExportedProgram, - is_quantized: bool, - target_board: str, - ): - - self.input_names = _get_input_names(edge_program) - self.output_nodes = _get_output_nodes(exported_program) - - self.is_quantized = is_quantized - self.target_board = target_board - - if is_quantized: - self.qp_input = _get_input_quantization_params(exported_program) - self.qp_output = _get_output_quantization_params(self.output_nodes) - else: - self.qp_input = [None] * len(self.input_names) - self.qp_output = [None] * len(self.output_nodes) - - self._has_init_run = True - - def set_timeout(self, timeout: int): - self.timeout = timeout - - def run_corstone( - self, - inputs: Tuple[torch.Tensor], - ) -> list[torch.Tensor]: - - assert ( - self._has_init_run - ), "RunnerUtil needs to be initialized using init_run() before running Corstone FVP." - if self.target_board not in ["corstone-300", "corstone-320"]: - raise RuntimeError(f"Unknown target board: {self.target_board}") - - pte_path = os.path.join(self.intermediate_path, "program.pte") - assert os.path.exists(pte_path), f"Pte path '{pte_path}' not found." - - for input_name, quant_param, data in zip( - self.input_names, self.qp_input, inputs - ): - save_bytes(self.intermediate_path, data, False, input_name, quant_param) - - out_path = os.path.join(self.intermediate_path, "out") - - input_paths = [] - for name in self.input_names: - input_paths.append( - os.path.join(self.intermediate_path, f"{name}.bin"), - ) - elf_path = os.path.join( - "cmake-out", - f"arm_semihosting_executor_runner_{self.target_board}", - "arm_executor_runner", - ) - assert os.path.exists( - elf_path - ), f"Did not find build arm_executor_runner in path {elf_path}, run setup_testing.sh?" - - cmd_line = f"executor_runner -m {pte_path} -o {out_path}" - - for input_path in input_paths: - cmd_line += f" -i {input_path}" - - ethos_u_extra_args = "" - if is_option_enabled("fast_fvp"): - ethos_u_extra_args = ethos_u_extra_args + "--fast" +def run_corstone( + executorch_program_manager: ExecutorchProgramManager, + inputs: Tuple[torch.Tensor], + intermediate_path: str | Path, + target_board: Literal["corestone-300", "corestone-320"], + elf_path: str | Path, + timeout: int = 120, # s +) -> list[torch.Tensor]: + """Executes an inference of the exported_program on FVP. + Returns a list of tensors with the output. + Args: + `executorch_program_manager`: the executorch program to run. + The output of a EdgeProgramManager.to_executorch() call. + `inputs`: A list of tensors with the inputs of the inference. + `dump_path`: A directory where the .pte and inputs are saved to file. + The output tensors are saved in `dump_path`/out. + `target_board`: Whether to run the corstone-300 FVP or the corstone-320 FVP + `elf_path`: The path to the runtime elf. Needs to have semihosting enabled + and match the target_board. + `timeout`: The timeout until the FVP terminates the elf, in seconds. + A runtime with semihosting needs + Limitations: + Relies on the output tensors from the exported program + to figure out the shape and dtype of the buffer that was + output from the FVP. + """ - command_args = { - "corstone-300": [ + exported_program = executorch_program_manager.exported_program() + intermediate_path = Path(intermediate_path) + intermediate_path.mkdir(exist_ok=True) + elf_path = Path(elf_path) + if not elf_path.exists(): + raise FileNotFoundError(f"Did not find elf file {elf_path}") + + # Save pte to file + pte_path = os.path.join(intermediate_path, "program.pte") + with open(pte_path, "wb") as f: + f.write(executorch_program_manager.buffer) + + # Save inputs to file + input_names = get_input_names(exported_program) + input_paths = [] + for input_name, input_ in zip(input_names, inputs): + input_path = save_bytes(intermediate_path, input_, input_name) + input_paths.append(input_path) + + out_path = os.path.join(intermediate_path, "out") + + cmd_line = f"executor_runner -m {pte_path} -o {out_path}" + for input_path in input_paths: + cmd_line += f" -i {input_path}" + + ethos_u_extra_args = "" + if is_option_enabled("fast_fvp"): + ethos_u_extra_args = ethos_u_extra_args + "--fast" + + match target_board: + case "corstone-300": + command_args = [ "FVP_Corstone_SSE-300_Ethos-U55", "-C", "ethosu.num_macs=128", @@ -314,9 +272,10 @@ def run_corstone( "-a", elf_path, "--timelimit", - f"{self.timeout}", - ], - "corstone-320": [ + f"{timeout}", + ] + case "corstone-320": + command_args = [ "FVP_Corstone_SSE-320", "-C", "mps4_board.subsystem.ethosu.num_macs=128", @@ -345,212 +304,44 @@ def run_corstone( "-a", elf_path, "--timelimit", - f"{self.timeout}", - ], - } - - result = _run_cmd(command_args[self.target_board], check=False) - if result.returncode != 0: - raise RuntimeError( - f"Failed to run {command_args[self.target_board]}\nOutput:\n{result.stdout.decode()}\nError: {result.stderr.decode()}" - ) - result_stdout = result.stdout.decode() + f"{timeout}", + ] + case _: + raise ValueError(f"Unknown target board {target_board}") - error_regex = r"(^[EF][: ].*$)|(^.*Hard fault.*$)|(^.*Assertion.*$)" + result = _run_cmd(command_args) - # Check for errors in the output - # regex to check for error or fault messages in stdout from FVP - if re.compile(error_regex, re.MULTILINE).search(result_stdout): - raise RuntimeError( - f"Corstone simulation failed:\ncmd: {command_args[self.target_board]}\n, log: \n {result_stdout}\n{result.stderr.decode()}" - ) - output_np = [] - for i, node in enumerate(self.output_nodes): - tosa_ref_output = np.fromfile( - os.path.join(self.intermediate_path, f"out-{i}.bin"), dtype=np.float32 - ) - output_shape = node.meta["val"].shape - output_np.append(torch.from_numpy(tosa_ref_output).reshape(output_shape)) - return tuple(output_np) - - def run_tosa_graph( - self, graph: TosaGraph, inputs: list[np.ndarray] | list[torch.Tensor] - ) -> torch.Tensor: - """Runs the TOSA reference model with inputs and returns the result.""" - data_np = [ - prep_data_for_save( - input, self.is_quantized, self.input_names[i], self.qp_input[i] - ) - for i, input in enumerate(inputs) - ] - # tosa_profile: 0 = Base Inference, 1 = Main Inference, 2 = Main Training. - tosa_profile = 0 if self.is_quantized else 1 - debug_mode = "ALL" if logger.level <= logging.DEBUG else None - outputs, status = tosa_reference_model.run( - graph, - data_np, - verbosity=_tosa_refmodel_loglevel(logger.level), - tosa_profile=tosa_profile, - initialize_variable_tensor_from_numpy=1, # True - debug_mode=debug_mode, + # Regex to check for error or fault messages in stdout from FVP + result_stdout = result.stdout.decode() + error_regex = r"(^[EF][: ].*$)|(^.*Hard fault.*$)|(^.*Assertion.*$)" + if re.compile(error_regex, re.MULTILINE).search(result_stdout): + raise RuntimeError( + f"Corstone simulation failed:\ncmd: {' '.join(command_args)}\nlog: \n {result_stdout}\n{result.stderr.decode()}" ) - assert ( - status == tosa_reference_model.GraphStatus.TOSA_VALID - ), "Non-valid TOSA given to reference model." - - outputs_torch = [] - for output in outputs: - output = torch.from_numpy(output) - if self.is_quantized: - # Need to dequant back to FP32 for comparison with torch output - quant_param = self.qp_output - assert ( - quant_param is not None - ), "There are no quantization parameters, check output parameters" - output = (output.to(torch.float32) - quant_param.zp) * quant_param.scale - outputs_torch.append(output) - return tuple(outputs_torch) - - def run_tosa_ref_model( - self, - inputs: Tuple[torch.Tensor], - ) -> list[torch.Tensor]: - """ - Run TOSA reference model using the tosa_reference_model program. - - In order to do that we need: - 1. desc.json, which points to files needed by tosa_reference_model. - 2. output.tosa, which is the TOSA buffer that describes the model we're - trying to run. - - These two files are created by arm_backend.py as part of partition stage - - All these files are saved on disk in self.intermediate_path. - - Args: - inputs (Tuple[torch.Tensor]): The input data to run the TOSA - - Returns: - torch.Tensor: The output of the TOSA reference model, as a torch - tensor. - - Here's a sample desc.json file: - { - "tosa_file": "output.tosa", - "ifm_name": [ - "arg0_1" - ], - "ifm_file": [ - "arg0_1.npy" - ], - "ofm_name": [ - "quantized_decomposed_dequantize_per_tensor_default_1" - ], - "ofm_file": [ - "ref-quantized_decomposed_dequantize_per_tensor_default_1.npy" - ], - "expected_return_code": 0, - "expected_failure": false - } - - Todo: - * It would be nice to not rely on files on disk. Should be possible - as a next step. See: - https://review.mlplatform.org/plugins/gitiles/tosa/reference_model/#executable-usage - """ - - assert ( - self._has_init_run - ), "RunnerUtil needs to be initialized using init_run() before running tosa reference." - - all_desc_file_paths = [ - str(path) for path in Path(self.intermediate_path).glob("desc*.json") - ] - assert ( - all_desc_file_paths - ), f"No TOSA description file found in '{self.intermediate_path}'." - if len(all_desc_file_paths) != 1: - raise NotImplementedError( - "Graphs with more than one partition are currently not supported." - ) - - desc_file_path = all_desc_file_paths[0] - assert os.path.exists( - desc_file_path - ), f"desc_file_path: {desc_file_path} does not exist" - - # Save the input data to disk as a .npy file, since that's what the TOSA - # reference model expects. Name of the file must match the name in - # desc.json, which is the tensor name from the graph + .npy - for input_name, quant_param, data in zip( - self.input_names, self.qp_input, inputs, strict=True - ): - save_npy( - self.intermediate_path, data, self.is_quantized, input_name, quant_param - ) + output_nodes = get_output_nodes(exported_program) + output_np = [] + for i, node in enumerate(output_nodes): + output_shape = node.meta["val"].shape + output_dtype = node.meta["val"].dtype + tosa_ref_output = np.fromfile( + os.path.join(intermediate_path, f"out-{i}.bin"), + torch_to_numpy_dtype_dict[output_dtype], + ) - # Run the TOSA reference model via command line, this will produce a - # .npy file with the result (aka OFM). - assert ( - shutil.which(self.tosa_ref_model_path) is not None - ), f"tosa_reference_model tool not found, did you run examples/arm/setup.sh? Path: {self.tosa_ref_model_path}" - - cmd_ref_model = [ - self.tosa_ref_model_path, - "--test_desc", - desc_file_path, - "-l", - _tosa_refmodel_loglevel(logger.level), - ] - _run_cmd(cmd_ref_model) - - # Load desc.json, just to get the name of the output file above - with open(desc_file_path) as f: - desc_json = json.load(f) - - tosa_ref_outputs = [] - for ofm_file in desc_json["ofm_file"]: - ofm_file_npy = os.path.join(self.intermediate_path, ofm_file) - - # Load the output file (OFM) and return it as a numpy array - tosa_ref_output = np.load(ofm_file_npy) - - if self.is_quantized: - # Need to dequant back to FP32 for comparison with torch output - # Convert to int32 prior to dequantize the output - if tosa_ref_output.dtype == np.int8: - tosa_ref_output = tosa_ref_output.astype(np.int32) - quant_param = self.qp_output - if quant_param is not None: - # I.e. bool output is possible for quantized models - tosa_ref_output = ( - tosa_ref_output - quant_param.zp - ) * quant_param.scale - - if tosa_ref_output.dtype == np.double: - tosa_ref_output = tosa_ref_output.astype("float32") - elif tosa_ref_output.dtype == bool: - # retain the bool output though for boolean related comparisons - tosa_ref_output = tosa_ref_output.astype("bool") - - # tosa_output is a numpy array, convert to torch tensor for comparison - tosa_ref_outputs.append(torch.from_numpy(tosa_ref_output)) - - return tosa_ref_outputs + output_np.append(torch.from_numpy(tosa_ref_output).reshape(output_shape)) + return tuple(output_np) def prep_data_for_save( data: torch.Tensor, - is_quantized: bool, input_name: str, - quant_param: QuantizationParams, + quant_param: Optional[QuantizationParams] = None, ): data_np = np.array(data.detach(), order="C").astype( - f"{data.dtype}".replace("torch.", "") + torch_to_numpy_dtype_dict[data.dtype] ) - - if is_quantized: + if quant_param is not None: assert quant_param.node_name in input_name, ( f"The quantization params name '{quant_param.node_name}' does not " f"match the input tensor name '{input_name}'." @@ -569,22 +360,20 @@ def prep_data_for_save( def save_npy( path: str, data, - is_quantized: bool, input_name: str, - quant_param: QuantizationParams, + quant_param: Optional[QuantizationParams] = None, ) -> str: """Serializes and saves 'data' as a .npy file, possibly quantizing it before. Parameters: path: the directory where to save the data. data: the data to save. - is_quantized: whether to quantize the data before saving it. input_name: the name of the file, without file-ending. quant_param: the parameters to use for quantization. Returns: the full file path of the output. """ - data_np = prep_data_for_save(data, is_quantized, input_name, quant_param) + data_np = prep_data_for_save(data, input_name, quant_param) file_path = os.path.join(path, input_name + ".npy") np.save(file_path, data_np, allow_pickle=False) @@ -594,22 +383,20 @@ def save_npy( def save_bytes( path: str, data, - is_quantized: bool, input_name: str, - quant_param: QuantizationParams, + quant_param: Optional[QuantizationParams] = None, ) -> str: """Serializes and saves 'data' in byte format, possibly quantizing it before. Parameters: path: the directory where to save the data. data: the data to save. - is_quantized: whether to quantize the data before saving it. input_name: the name of the file, without file-ending. quant_param: the parameters to use for quantization. Returns: the full file path of the output. """ - data_np = prep_data_for_save(data, is_quantized, input_name, quant_param) + data_np = prep_data_for_save(data, input_name, quant_param) file_path = os.path.join(path, input_name + ".bin") with open(file_path, "w+b") as f: data_np_bytes = data_np.tobytes() @@ -705,7 +492,7 @@ def _tosa_refmodel_loglevel(loglevel: int) -> str: return loglevel_map[clamped_logging_level] -def run_tosa_graph_static( +def run_tosa_graph( graph: TosaGraph, tosa_version: TosaSpecification, inputs: list[torch.Tensor], @@ -740,11 +527,25 @@ def run_tosa_graph_static( def transpose_data_format(data: list[np.ndarray], to: Literal["NHWC", "NCHW"]): - if to == "NCHW": - dim_order = (0, 3, 1, 2) - if to == "NHWC": - dim_order = (0, 2, 3, 1) + match to: + case "NCHW": + dim_order = (0, 3, 1, 2) + case "NHWC": + dim_order = (0, 2, 3, 1) + case _: + raise NotImplementedError(f"Cant transpose to dim order {to}") for i in range(len(data)): if hasattr(data[i], "shape") and len(data[i].shape) == 4: # Copy is needed to force actual data conversion, not setting stride. data[i] = np.transpose(data[i], dim_order).copy() + + +def get_target_board(compile_spec: list[CompileSpec]) -> str | None: + for spec in compile_spec: + if spec.key == "compile_flags": + flags = spec.value.decode() + if "u55" in flags: + return "corstone-300" + elif "u85" in flags: + return "corstone-320" + return None diff --git a/backends/arm/test/tester/analyze_output_utils.py b/backends/arm/test/tester/analyze_output_utils.py index 477a96652fe..3436bfe618a 100644 --- a/backends/arm/test/tester/analyze_output_utils.py +++ b/backends/arm/test/tester/analyze_output_utils.py @@ -7,10 +7,11 @@ import tempfile import torch +from executorch.backends.arm.arm_backend import get_intermediate_path from executorch.backends.arm.test.runner_utils import ( - _get_input_quantization_params, - _get_output_nodes, - _get_output_quantization_params, + get_input_quantization_params, + get_output_nodes, + get_output_quantization_params, ) from executorch.backends.xnnpack.test.tester.tester import Export, Quantize @@ -220,7 +221,7 @@ def dump_error_output( # Capture assertion error and print more info banner = "=" * 40 + "TOSA debug info" + "=" * 40 logger.error(banner) - path_to_tosa_files = tester.runner_util.intermediate_path + path_to_tosa_files = get_intermediate_path(tester.compile_spec) if path_to_tosa_files is None: path_to_tosa_files = tempfile.mkdtemp(prefix="executorch_result_dump_") @@ -228,9 +229,9 @@ def dump_error_output( export_stage = tester.stages.get(tester.stage_name(Export), None) quantize_stage = tester.stages.get(tester.stage_name(Quantize), None) if export_stage is not None and quantize_stage is not None: - output_nodes = _get_output_nodes(export_stage.artifact) - qp_input = _get_input_quantization_params(export_stage.artifact) - qp_output = _get_output_quantization_params(output_nodes) + output_nodes = get_output_nodes(export_stage.artifact) + qp_input = get_input_quantization_params(export_stage.artifact) + qp_output = get_output_quantization_params(output_nodes) logger.error(f"Input QuantArgs: {qp_input}") logger.error(f"Output QuantArgs: {qp_output}") diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py index 5b2f9201fc5..2c11cedae11 100644 --- a/backends/arm/test/tester/arm_tester.py +++ b/backends/arm/test/tester/arm_tester.py @@ -5,6 +5,7 @@ import logging +import os from collections import Counter from pprint import pformat from typing import Iterable, List, Optional, Tuple, Union @@ -22,10 +23,12 @@ ArmQuantizer, get_symmetric_quantization_config, ) -from executorch.backends.arm.test.common import get_target_board from executorch.backends.arm.test.runner_utils import ( dbg_tosa_fb_to_json, - RunnerUtil, + get_output_nodes, + get_output_quantization_params, + get_target_board, + run_corstone, TosaReferenceModelDispatch, ) @@ -46,6 +49,7 @@ from tabulate import tabulate from torch.export.graph_signature import ExportGraphSignature, InputSpec, OutputSpec from torch.fx import Graph +from torch.utils._pytree import tree_flatten logger = logging.getLogger(__name__) @@ -109,18 +113,43 @@ def dump_artifact(self, path_to_dump: Optional[str]): class Serialize(tester.Serialize): - def __init__(self, runner_util: RunnerUtil, timeout: int = 1): + def __init__(self, compile_spec: list[CompileSpec], timeout): super().__init__() - self.runner = runner_util - self.runner.set_timeout(timeout) + self.timeout = timeout + self.executorch_program_manager: ExecutorchProgramManager | None + self.compile_spec = compile_spec + + def run(self, artifact: ExecutorchProgramManager, inputs=None) -> None: + super().run(artifact, inputs) + # Keep the entire ExecutorchProgramManager for execution. + self.executorch_program_manager = artifact def run_artifact(self, inputs): - return self.runner.run_corstone(inputs) + if self.executorch_program_manager is None: + raise RuntimeError( + "Tried running artifact from Serialize stage without running the stage." + ) + inputs_flattened, _ = tree_flatten(inputs) + intermediate_path = get_intermediate_path(self.compile_spec) + target_board = get_target_board(self.compile_spec) + elf_path = os.path.join( + "cmake-out", + f"arm_semihosting_executor_runner_{target_board}", + "arm_executor_runner", + ) + if not os.path.exists(elf_path): + raise FileNotFoundError( + f"Did not find build arm_executor_runner in path {elf_path}, run setup_testing.sh?" + ) - def dump_artifact(self, path_to_dump: Optional[str]): - if not path_to_dump: - path_to_dump = self.path + "/program.pte" - super().dump_artifact(path_to_dump) + return run_corstone( + self.executorch_program_manager, + inputs_flattened, + intermediate_path, + target_board, + elf_path, + self.timeout, + ) class ToExecutorch(tester.ToExecutorch): @@ -156,8 +185,7 @@ def __init__( self, model: torch.nn.Module, example_inputs: Tuple[torch.Tensor], - compile_spec: List[CompileSpec] = None, - tosa_ref_model_path: str | None = None, + compile_spec: List[CompileSpec], ): """ Args: @@ -166,13 +194,6 @@ def __init__( compile_spec (List[CompileSpec]): The compile spec to use """ - # Initiate runner_util - intermediate_path = get_intermediate_path(compile_spec) - self.runner_util = RunnerUtil( - intermediate_path=intermediate_path, - tosa_ref_model_path=tosa_ref_model_path, - ) - self.compile_spec = compile_spec super().__init__(model, example_inputs) self.pipeline[self.stage_name(InitialModel)] = [ @@ -245,16 +266,12 @@ def serialize( self, serialize_stage: Optional[Serialize] = None, timeout: int = 480 ): if serialize_stage is None: - serialize_stage = Serialize(self.runner_util, timeout=timeout) + serialize_stage = Serialize(self.compile_spec, timeout) assert ( get_intermediate_path(self.compile_spec) is not None ), "Can't dump serialized file when compile specs do not contain an artifact path." - return ( - super() - .serialize(serialize_stage) - .dump_artifact(get_intermediate_path(self.compile_spec) + "/program.pte") - ) + return super().serialize(serialize_stage) def is_quantized(self) -> bool: return self.stages[self.stage_name(tester.Quantize)] is not None @@ -263,7 +280,6 @@ def run_method_and_compare_outputs( self, inputs: Optional[Tuple[torch.Tensor]] = None, stage: Optional[str] = None, - target_board: Optional[str] = None, num_runs=1, atol=1e-03, rtol=1e-03, @@ -287,9 +303,6 @@ def run_method_and_compare_outputs( edge_stage = self.stages[self.stage_name(tester.ToEdge)] if edge_stage is None: edge_stage = self.stages[self.stage_name(tester.ToEdgeTransformAndLower)] - assert ( - self.runner_util is not None - ), "self.tosa_test_util is not initialized, cannot use run_method()" assert ( edge_stage is not None ), "To compare outputs, at least the ToEdge or ToEdgeTransformAndLower stage needs to be run." @@ -298,29 +311,19 @@ def run_method_and_compare_outputs( test_stage = self.stages[stage] is_quantized = self.is_quantized() - if target_board is None: - target_board = get_target_board(self.compile_spec) - - exported_program = self.stages[self.stage_name(tester.Export)].artifact - edge_program = edge_stage.artifact.exported_program() - - self.runner_util.init_run( - exported_program, - edge_program, - is_quantized, - target_board, - ) - if is_quantized: reference_stage = self.stages[self.stage_name(tester.Quantize)] - # bool output is quantized with none quantized output so allow - # self.runner_util.qp_output to be none - if self.runner_util.qp_output is not None: - quantization_scales = [qp.scale for qp in self.runner_util.qp_output] else: - quantization_scales = [None] * len(self.runner_util.output_nodes) reference_stage = self.stages[self.stage_name(InitialModel)] + exported_program = self.stages[self.stage_name(tester.Export)].artifact + output_nodes = get_output_nodes(exported_program) + output_qparams = get_output_quantization_params(output_nodes) + + quantization_scales = [] + for node in output_qparams: + quantization_scales.append(getattr(output_qparams[node], "scale", None)) + logger.info( f"Comparing Stage '{self.stage_name(test_stage)}' with Stage '{self.stage_name(reference_stage)}'" )