From 674b125a8f1950ca8d9382f2dbefab07ecca120b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Sat, 11 Jan 2025 11:24:37 +0100 Subject: [PATCH 1/6] Arm: Add initial Llama model test case Adds Llama model test case for TOSA-0.80+MI. Handles Add and Mul where inputs have different ranks. New unit test parameters --llama_inputs added, without it test will be skipped. Tested with smaller stories, see examples/models/llama/UTILS.md. Adds get_llama_model() to export_llama_lib used in test case. Change-Id: I003bbcee8f0cc35193d793a4af9b031453114e71 --- backends/arm/operator_support/__init__.py | 1 + .../operator_support/slice_copy_support.py | 39 ++++++ .../tosa_supported_operators.py | 2 - backends/arm/operators/op_add.py | 19 ++- backends/arm/operators/op_mul.py | 26 +++- backends/arm/operators/op_slice.py | 6 +- backends/arm/test/conftest.py | 6 + backends/arm/test/models/test_llama_arm.py | 125 ++++++++++++++++++ backends/arm/test/ops/test_add.py | 24 +++- backends/arm/test/ops/test_mul.py | 55 +++++++- backends/arm/test/tester/arm_tester.py | 35 ++++- backends/arm/tosa_utils.py | 39 ++++++ examples/models/llama/export_llama_lib.py | 12 ++ 13 files changed, 366 insertions(+), 23 deletions(-) create mode 100644 backends/arm/operator_support/slice_copy_support.py create mode 100644 backends/arm/test/models/test_llama_arm.py diff --git a/backends/arm/operator_support/__init__.py b/backends/arm/operator_support/__init__.py index c6895cce492..e9fb2eda12e 100644 --- a/backends/arm/operator_support/__init__.py +++ b/backends/arm/operator_support/__init__.py @@ -10,6 +10,7 @@ pool_2d_support, reduce_sum_support, right_shift_support, + slice_copy_support, to_copy_support, tosa_supported_operators, ) diff --git a/backends/arm/operator_support/slice_copy_support.py b/backends/arm/operator_support/slice_copy_support.py new file mode 100644 index 00000000000..1f5ace91cde --- /dev/null +++ b/backends/arm/operator_support/slice_copy_support.py @@ -0,0 +1,39 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +import logging + +import torch.fx as fx +from executorch.backends.arm.operator_support.tosa_supported_operators import ( + register_tosa_support_check, + SupportedTOSAOperatorCheck, +) +from executorch.backends.arm.tosa_specification import TosaSpecification +from executorch.backends.arm.tosa_utils import getNodeArgs +from executorch.exir.dialects._ops import ops as exir_ops + +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) + + +@register_tosa_support_check +class SliceCopySupported(SupportedTOSAOperatorCheck): + targets = [exir_ops.edge.aten.slice_copy.Tensor] + + tosa_specs = [ + TosaSpecification.create_from_string("TOSA-0.80+BI"), + TosaSpecification.create_from_string("TOSA-0.80+MI"), + ] + + def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification) -> bool: # type: ignore[override, misc] + if tosa_spec not in self.tosa_specs: + return False + + inputs = getNodeArgs(node) + if len(inputs) == 5 and (step := inputs[4].number) != 1: + logging.warning(f"{node.target} with step size of {step} not supported.") + return False + return True diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py index dd092968764..fb5509d04ab 100644 --- a/backends/arm/operator_support/tosa_supported_operators.py +++ b/backends/arm/operator_support/tosa_supported_operators.py @@ -62,7 +62,6 @@ def register_tosa_support_check(checker: Type[SupportedTOSAOperatorCheck]): def get_registered_tosa_support_checks( tosa_spec: TosaSpecification, ) -> list[Type[SupportedTOSAOperatorCheck]]: - if tosa_spec not in _tosa_spec_support: raise RuntimeError @@ -125,7 +124,6 @@ def is_node_supported(self, submodules, node: fx.Node) -> bool: exir_ops.edge.aten._softmax.default, exir_ops.edge.aten.select_copy.int, exir_ops.edge.aten._log_softmax.default, - exir_ops.edge.aten.slice_copy.Tensor, exir_ops.edge.aten.sub.Tensor, exir_ops.edge.aten.tanh.default, exir_ops.edge.aten.upsample_nearest2d.vec, diff --git a/backends/arm/operators/op_add.py b/backends/arm/operators/op_add.py index ccdeb2c1bcf..cb14dcb43d8 100644 --- a/backends/arm/operators/op_add.py +++ b/backends/arm/operators/op_add.py @@ -45,6 +45,12 @@ def define_node( # Handle int8 (quantized) and int32 assert inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32] + dim_order = ( + inputs[0].dim_order + if len(inputs[0].shape) > len(inputs[1].shape) + else inputs[1].dim_order + ) + if inputs[0].dtype == ts.DType.INT8: rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32( tosa_graph, inputs, node @@ -61,13 +67,14 @@ def define_node( # output.dtype == ts.DType.INT32 add_output = output + input1, input2 = tutils.reshape_for_broadcast( + tosa_graph, rescaled_inputs, dim_order + ) + # Do the INT32 Add tosa_graph.addOperator( TosaOp.Op().ADD, - [ - rescaled_inputs[0].name, - rescaled_inputs[1].name, - ], + [input1.name, input2.name], [add_output.name], None, ) @@ -108,10 +115,12 @@ def define_node( assert inputs[0].dtype == ts.DType.FP32 assert output.dtype == ts.DType.FP32 + input1, input2 = tutils.reshape_for_broadcast(tosa_graph, inputs) + # MI lowering tosa_graph.addOperator( TosaOp.Op().ADD, - [inputs[0].name, inputs[1].name], + [input1.name, input2.name], [output.name], None, ) diff --git a/backends/arm/operators/op_mul.py b/backends/arm/operators/op_mul.py index ef886de11e8..4fff2c2f9b4 100644 --- a/backends/arm/operators/op_mul.py +++ b/backends/arm/operators/op_mul.py @@ -24,6 +24,7 @@ ) from executorch.backends.arm.tosa_mapping import TosaArg from executorch.backends.arm.tosa_specification import TosaSpecification +from executorch.backends.arm.tosa_utils import reshape_for_broadcast from serializer.tosa_serializer import TosaOp @@ -43,6 +44,12 @@ def define_node( output: TosaArg, ) -> None: assert inputs[0].dtype == inputs[1].dtype == output.dtype == ts.DType.INT8 + + dim_order = ( + inputs[0].dim_order + if len(inputs[0].shape) > len(inputs[1].shape) + else inputs[1].dim_order + ) input_A = inputs[0] input_B = inputs[1] input_qparams = get_input_qparams(node) # pyre-ignore[16] @@ -68,15 +75,21 @@ def define_node( output_shape = tutils.tosa_shape(output.shape, output.dim_order) mul_output = tosa_graph.addIntermediate(output_shape, ts.DType.INT32) + input1, input2 = tutils.reshape_for_broadcast( + tosa_graph, + [ + input_A_rescaled, + input_B_rescaled, + ], + dim_order, + ) + # Do the INT32 Mul attr = ts.TosaSerializerAttribute() attr.MulAttribute(shift=0) tosa_graph.addOperator( TosaOp.Op().MUL, - [ - input_A_rescaled.name, - input_B_rescaled.name, - ], + [input1.name, input2.name], [mul_output.name], attr, ) @@ -101,8 +114,11 @@ def define_node( ) -> None: if inputs[0].dtype == ts.DType.INT8: return super().define_node(node, tosa_graph, inputs, output) + + input1, input2 = reshape_for_broadcast(tosa_graph, inputs) + attr = ts.TosaSerializerAttribute() attr.MulAttribute(shift=0) tosa_graph.addOperator( - TosaOp.Op().MUL, [inputs[0].name, inputs[1].name], [output.name], attr + TosaOp.Op().MUL, [input1.name, input2.name], [output.name], attr ) diff --git a/backends/arm/operators/op_slice.py b/backends/arm/operators/op_slice.py index 7f4804af587..41d7a83235c 100644 --- a/backends/arm/operators/op_slice.py +++ b/backends/arm/operators/op_slice.py @@ -32,9 +32,11 @@ def define_node( output: TosaArg, ) -> None: + # See slice_copy_support.py + assert len(inputs) == 4 or (len(inputs) == 5 and inputs[4].number == 1) + # aten.slice_copy supports slicing in 1d at a time. - # The arguments are dimension of slicing, start index and end index. - assert len(inputs) == 4 + # The arguments are the actual input, dimension of slicing, start index, end index and optinal step or stride. input_node, dim, start, end = inputs # Translate and check parameters in Pytorch dim order. diff --git a/backends/arm/test/conftest.py b/backends/arm/test/conftest.py index 081d499d4d5..2038e96e60f 100644 --- a/backends/arm/test/conftest.py +++ b/backends/arm/test/conftest.py @@ -36,6 +36,7 @@ def pytest_configure(config): ) pytest._test_options["corstone_fvp"] = True # type: ignore[attr-defined] pytest._test_options["fast_fvp"] = config.option.fast_fvp # type: ignore[attr-defined] + pytest._test_options["llama_inputs"] = config.option.llama_inputs # type: ignore[attr-defined] logging.basicConfig(level=logging.INFO, stream=sys.stdout) @@ -47,6 +48,11 @@ def pytest_addoption(parser): parser.addoption("--arm_quantize_io", action="store_true", help="Deprecated.") parser.addoption("--arm_run_corstoneFVP", action="store_true") parser.addoption("--fast_fvp", action="store_true") + parser.addoption( + "--llama_inputs", + nargs="+", + help="List of two files. Firstly .pt file. Secondly .json", + ) def pytest_sessionstart(session): diff --git a/backends/arm/test/models/test_llama_arm.py b/backends/arm/test/models/test_llama_arm.py new file mode 100644 index 00000000000..75f95cde036 --- /dev/null +++ b/backends/arm/test/models/test_llama_arm.py @@ -0,0 +1,125 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +import os +import sys +import unittest + +import torch + +from executorch.backends.arm.test import common, conftest +from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.examples.models.llama.export_llama_lib import ( + build_args_parser, + get_llama_model, +) + +from executorch.exir import EdgeCompileConfig + +# Add project dir to sys path to workaround importlib.import_module() conditions in model_factory.py +this_files_dir = os.path.dirname(os.path.abspath(__file__)) +project_dir = os.path.abspath(os.path.join(this_files_dir, "../../../..")) +sys.path.append(project_dir) + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class TestLlama(unittest.TestCase): + """ + Test class of Llama models. Type of Llama model depends on command line parameters: + --llama_inputs + Example: --llama_inputs stories110M/stories110M.pt stories110M/params.json + """ + + _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig( + _check_ir_validity=False, + _skip_dim_order=True, # TODO(T182928844): Delegate dim order op to backend. + ) + + def prepare_model(self): + + checkpoint = None + params_file = None + if conftest.is_option_enabled("llama_inputs"): + param_list = conftest.get_option("llama_inputs") + assert ( + isinstance(param_list, list) and len(param_list) == 2 + ), "invalid number of inputs for --llama_inputs" + checkpoint = param_list[0] + params_file = param_list[1] + assert isinstance(checkpoint, str) and isinstance( + params_file, str + ), "invalid input for --llama_inputs" + else: + logging.warning( + "Skipping Llama test because of lack of input. To run use --llama_inputs <.pt> <.json>" + ) + return + + assert os.path.isfile(checkpoint) and os.path.isfile( + params_file + ), "Invalid file paths" + + # TODO: Enable key value cache + args = [ + "--disable_dynamic_shape", + "-c", + checkpoint, + "-p", + params_file, + "--model", + "stories110m", + ] + parser = build_args_parser() + args = parser.parse_args(args) + + llama_model, llama_inputs, llama_meta = get_llama_model(args) + + # TODO: Remove workaround since attention mask should not be persistent, + # it only works if input shape is always the same + freqs_c = "freqs_cos" + freqs_s = "freqs_sin" + for i in range(llama_model.n_layers): + val = llama_model.layers[i].attention.get_buffer("mask") + llama_model.layers[i].attention.register_buffer( + "mask", val, persistent=True + ) + val = llama_model.layers[i].attention.rope.get_buffer(freqs_c) + llama_model.layers[i].attention.rope.register_buffer( + freqs_c, val, persistent=True + ) + val = llama_model.layers[i].attention.rope.get_buffer(freqs_s) + llama_model.layers[i].attention.rope.register_buffer( + freqs_s, val, persistent=True + ) + + return llama_model, llama_inputs, llama_meta + + def test_llama_tosa_MI(self): + llama_model, llama_inputs, llama_meta = self.prepare_model() + + with torch.no_grad(): + ( + ArmTester( + llama_model, + example_inputs=llama_inputs, + compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), + constant_methods=llama_meta, + ) + .export() + .to_edge_transform_and_lower( + edge_compile_config=self._edge_compile_config + ) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 25}) + .to_executorch() + .run_method_and_compare_outputs( + inputs=llama_inputs, atol=1.8, rtol=0.01 + ) + ) diff --git a/backends/arm/test/ops/test_add.py b/backends/arm/test/ops/test_add.py index b4b43f88c7a..3508ae305d1 100644 --- a/backends/arm/test/ops/test_add.py +++ b/backends/arm/test/ops/test_add.py @@ -5,7 +5,6 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. - from typing import Tuple import torch @@ -55,6 +54,17 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): } +class Add3(torch.nn.Module): + def forward(self, x: torch.Tensor, y: torch.Tensor): + return x + y + + test_data: list[input_t2] = { + "3d_randn_diff_rank": (torch.randn(1, 4, 5), torch.randn(4, 1)), + "4d_randn_diff_rank": (torch.randn(1, 1, 4, 4), torch.randn(4, 1)), + "4d_randn_diff_rank_2": (torch.randn(4, 1), torch.randn(1, 1, 4, 5)), + } + + @common.parametrize("test_data", Add.test_data) def test_add_tosa_MI(test_data: input_t1): pipeline = TosaPipelineMI[input_t1](Add(), test_data, aten_op, exir_op) @@ -107,6 +117,18 @@ def test_add2_tosa_MI(test_data: input_t2): pipeline.run() +@common.parametrize("test_data", Add3.test_data) +def test_add3_tosa_MI(test_data: input_t2): + pipeline = TosaPipelineMI[input_t2](Add3(), test_data, aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", Add3.test_data) +def test_add3_tosa_BI(test_data: input_t2): + pipeline = TosaPipelineBI[input_t2](Add3(), test_data, aten_op, exir_op) + pipeline.run() + + @common.parametrize("test_data", Add2.test_data) def test_add2_tosa_BI(test_data: input_t2): pipeline = TosaPipelineBI[input_t2](Add2(), test_data, aten_op, exir_op) diff --git a/backends/arm/test/ops/test_mul.py b/backends/arm/test/ops/test_mul.py index 715673b87c8..739864a4982 100644 --- a/backends/arm/test/ops/test_mul.py +++ b/backends/arm/test/ops/test_mul.py @@ -15,7 +15,7 @@ from executorch.exir.backend.backend_details import CompileSpec from parameterized import parameterized -test_data_sute = [ +test_data_suite = [ # (test_name, input, other,) See torch.mul() for info ( "op_mul_rank1_rand", @@ -55,6 +55,31 @@ ] +test_data_suite_2 = [ + # (test_name, input, other,) See torch.mul() for info + ( + "op_mul_rank2_rand", + torch.rand(4, 5), + torch.rand(5), + ), + ( + "op_mul_rank3_randn", + torch.randn(10, 5, 2), + torch.randn(5, 2), + ), + ( + "op_mul_rank4_randn", + torch.randn(1, 10, 25, 20), + torch.randn(1, 25, 20), + ), + ( + "op_mul_rank4_randn_2", + torch.randn(1, 25, 1), + torch.randn(1, 3, 25, 10), + ), +] + + class TestMul(unittest.TestCase): class Mul(torch.nn.Module): @@ -133,7 +158,7 @@ def _test_mul_ethosu_BI_pipeline( if conftest.is_option_enabled("corstone_fvp"): tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - @parameterized.expand(test_data_sute) + @parameterized.expand(test_data_suite) def test_mul_tosa_MI( self, test_name: str, @@ -143,7 +168,27 @@ def test_mul_tosa_MI( test_data = (input_, other_) self._test_mul_tosa_MI_pipeline(self.Mul(), test_data) - @parameterized.expand(test_data_sute) + @parameterized.expand(test_data_suite_2) + def test_mul_diff_input_ranks_tosa_MI( + self, + test_name: str, + input_: torch.Tensor, + other_: torch.Tensor, + ): + test_data = (input_, other_) + self._test_mul_tosa_MI_pipeline(self.Mul(), test_data) + + @parameterized.expand(test_data_suite_2) + def test_mul_diff_input_ranks_tosa_BI( + self, + test_name: str, + input_: torch.Tensor, + other_: torch.Tensor, + ): + test_data = (input_, other_) + self._test_mul_tosa_BI_pipeline(self.Mul(), test_data) + + @parameterized.expand(test_data_suite) def test_mul_tosa_BI( self, test_name: str, @@ -154,7 +199,7 @@ def test_mul_tosa_BI( test_data = (input_, other_) self._test_mul_tosa_BI_pipeline(self.Mul(), test_data) - @parameterized.expand(test_data_sute) + @parameterized.expand(test_data_suite) @pytest.mark.corstone_fvp def test_mul_u55_BI( self, @@ -167,7 +212,7 @@ def test_mul_u55_BI( common.get_u55_compile_spec(), self.Mul(), test_data ) - @parameterized.expand(test_data_sute) + @parameterized.expand(test_data_suite) @pytest.mark.corstone_fvp def test_mul_u85_BI( self, diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py index a6da2accd1d..3b75d23404b 100644 --- a/backends/arm/test/tester/arm_tester.py +++ b/backends/arm/test/tester/arm_tester.py @@ -3,12 +3,14 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import copy + import logging import os from collections import Counter from pprint import pformat -from typing import Callable, Iterable, List, Optional, Tuple, Type, Union +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, Union import executorch.backends.xnnpack.test.tester.tester as tester @@ -48,11 +50,13 @@ from executorch.backends.xnnpack.test.tester import Tester from executorch.devtools.backend_debug import get_delegation_info + from executorch.exir import ( EdgeCompileConfig, EdgeProgramManager, ExecutorchProgramManager, ExportedProgram, + to_edge_transform_and_lower, ) from executorch.exir.backend.backend_api import validation_disabled from executorch.exir.backend.compile_spec_schema import CompileSpec @@ -62,6 +66,7 @@ from executorch.exir.program._program import _update_exported_program_graph_module from tabulate import tabulate + from torch.export.graph_signature import ExportGraphSignature, InputSpec, OutputSpec from torch.fx import Graph from torch.utils._pytree import tree_flatten @@ -122,10 +127,28 @@ def dump_artifact(self, path_to_dump: Optional[str]): class ToEdgeTransformAndLower(tester.ToEdgeTransformAndLower): + def __init__( + self, + partitioners: Optional[List[Partitioner]] = None, + edge_compile_config: Optional[EdgeCompileConfig] = None, + constant_methods: Optional[Dict[str, Any]] = None, + ): + super().__init__(partitioners, edge_compile_config) + self.constant_methods = constant_methods + def dump_artifact(self, path_to_dump: Optional[str]): super().dump_artifact(path_to_dump) _dump_lowered_modules_artifact(path_to_dump, self.artifact, self.graph_module) + def run(self, artifact: ExportedProgram, inputs=None) -> None: + artifact_to_run = copy.deepcopy(artifact) + self.edge_dialect_program = to_edge_transform_and_lower( + artifact_to_run, + compile_config=self.edge_compile_conf, + partitioner=self.partitioners, + constant_methods=self.constant_methods, + ) + class Serialize(tester.Serialize): def __init__(self, compile_spec: list[CompileSpec], timeout): @@ -236,6 +259,9 @@ def __init__( model: torch.nn.Module, example_inputs: Tuple, compile_spec: List[CompileSpec], + tosa_ref_model_path: str | None = None, + dynamic_shapes: Optional[Tuple[Any]] = None, + constant_methods: Optional[Dict[str, Any]] = None, ): """ Args: @@ -244,8 +270,9 @@ def __init__( compile_spec (List[CompileSpec]): The compile spec to use """ + self.constant_methods = constant_methods self.compile_spec = compile_spec - super().__init__(model, example_inputs) + super().__init__(model, example_inputs, dynamic_shapes) self.pipeline[self.stage_name(InitialModel)] = [ self.stage_name(tester.Quantize), self.stage_name(tester.Export), @@ -310,7 +337,9 @@ def to_edge_transform_and_lower( raise ValueError("compile spec doesn't target any Arm Partitioner") partitioners = [arm_partitioner] to_edge_and_lower_stage = ToEdgeTransformAndLower( - partitioners, edge_compile_config + partitioners, + edge_compile_config, + constant_methods=self.constant_methods, ) else: if partitioners is not None: diff --git a/backends/arm/tosa_utils.py b/backends/arm/tosa_utils.py index 15d29b57482..14ddedd5e10 100644 --- a/backends/arm/tosa_utils.py +++ b/backends/arm/tosa_utils.py @@ -89,6 +89,45 @@ def build_reshape(tosa_fb, input_name, new_shape, output_name): tosa_fb.addOperator(TosaOp.Op().RESHAPE, [input_name], [output_name], attr) +def reshape_for_broadcast(tosa_fb, inputs, dim_order=None): + assert len(inputs) == 2 + input1 = inputs[0] + input2 = inputs[1] + + def get_new_shape(l_rank_in, h_rank_in): + rank_diff = len(h_rank_in.shape) - len(l_rank_in.shape) + new_shape = list(l_rank_in.shape) + + for _ in range(rank_diff): + new_shape.insert(0, 1) + return tuple(new_shape) + + if len(input1.shape) == len(input2.shape): + return input1, input2 + elif len(input1.shape) > len(input2.shape): + l_rank_in = input2 + h_rank_in = input1 + elif len(input1.shape) < len(input2.shape): + l_rank_in = input1 + h_rank_in = input2 + + new_shape = get_new_shape(l_rank_in, h_rank_in) + dim_order = h_rank_in.dim_order if dim_order is None else dim_order + new_shape = tosa_shape(new_shape, dim_order) + + reshaped = tosa_fb.addIntermediate( + new_shape, + inputs[0].dtype, + ) + + build_reshape(tosa_fb, l_rank_in.name, new_shape, reshaped.name) + + if len(input1.shape) > len(input2.shape): + return input1, reshaped + else: + return reshaped, input2 + + def is_consumer_node_depthwise_conv2d(node): consumer_node = list(node.users)[0] if consumer_node.target == exir_ops.edge.aten.convolution.default: diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index 4ad92903534..276cf05001f 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -1,5 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. +# Copyright 2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -1148,3 +1149,14 @@ def _get_source_transforms( # noqa transforms.append(replace_with_vulkan_rotary_emb) return transforms + + +def get_llama_model(args): + _validate_args(args) + e_mgr = _prepare_for_llama_export(args) + model = ( + e_mgr.model.eval().to(device="cuda") # pyre-ignore + if torch.cuda.is_available() + else e_mgr.model.eval().to(device="cpu") + ) + return model, e_mgr.example_inputs, e_mgr.metadata From e7c117bcde785459d9d2e186eb67fda26b1befa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Tue, 25 Feb 2025 19:00:21 +0100 Subject: [PATCH 2/6] Reduce number of delegates in test_llama_arm.py --- backends/arm/test/models/test_llama_arm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/arm/test/models/test_llama_arm.py b/backends/arm/test/models/test_llama_arm.py index 75f95cde036..365ae4580dd 100644 --- a/backends/arm/test/models/test_llama_arm.py +++ b/backends/arm/test/models/test_llama_arm.py @@ -117,7 +117,7 @@ def test_llama_tosa_MI(self): .to_edge_transform_and_lower( edge_compile_config=self._edge_compile_config ) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 25}) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 14}) .to_executorch() .run_method_and_compare_outputs( inputs=llama_inputs, atol=1.8, rtol=0.01 From 203cea5b8d2a334ded444a65427d3f805f9a3fa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Wed, 26 Feb 2025 11:32:05 +0100 Subject: [PATCH 3/6] Call llama3_2_vision/install_requirements.sh during Arm backend setup --- .ci/scripts/setup-arm-baremetal-tools.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.ci/scripts/setup-arm-baremetal-tools.sh b/.ci/scripts/setup-arm-baremetal-tools.sh index 454b9f336e9..7404b389e7d 100755 --- a/.ci/scripts/setup-arm-baremetal-tools.sh +++ b/.ci/scripts/setup-arm-baremetal-tools.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -9,3 +9,5 @@ git config --global user.email "github_executorch@arm.com" git config --global user.name "Github Executorch" bash examples/arm/setup.sh --i-agree-to-the-contained-eula + +./examples/models/llama3_2_vision/install_requirements.sh From afbf5e40251868613b1c424591d0657f1a910fc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Wed, 26 Feb 2025 11:35:29 +0100 Subject: [PATCH 4/6] Rename test_llama_arm.py to test_llama.py --- backends/arm/test/models/{test_llama_arm.py => test_llama.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename backends/arm/test/models/{test_llama_arm.py => test_llama.py} (100%) diff --git a/backends/arm/test/models/test_llama_arm.py b/backends/arm/test/models/test_llama.py similarity index 100% rename from backends/arm/test/models/test_llama_arm.py rename to backends/arm/test/models/test_llama.py From 25dfa1163abdb7e851648a5dcfe9ac3ccc5649dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Wed, 26 Feb 2025 14:10:57 +0100 Subject: [PATCH 5/6] Skip test_llama_tosa_MI if no input parameters are given --- backends/arm/test/models/test_llama.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backends/arm/test/models/test_llama.py b/backends/arm/test/models/test_llama.py index 365ae4580dd..3a827fc73b2 100644 --- a/backends/arm/test/models/test_llama.py +++ b/backends/arm/test/models/test_llama.py @@ -61,7 +61,7 @@ def prepare_model(self): logging.warning( "Skipping Llama test because of lack of input. To run use --llama_inputs <.pt> <.json>" ) - return + return None, None, None assert os.path.isfile(checkpoint) and os.path.isfile( params_file @@ -105,6 +105,9 @@ def prepare_model(self): def test_llama_tosa_MI(self): llama_model, llama_inputs, llama_meta = self.prepare_model() + if llama_model is None and llama_inputs is None and llama_meta is None: + return + with torch.no_grad(): ( ArmTester( From ec90735535546c0e5210c1d79f0ab5a04235094b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Thu, 27 Feb 2025 10:20:37 +0100 Subject: [PATCH 6/6] Update review comments --- .ci/scripts/setup-arm-baremetal-tools.sh | 4 +--- backends/arm/operators/op_slice.py | 3 ++- backends/arm/test/models/test_llama.py | 12 ++---------- backends/arm/test/test_arm_baremetal.sh | 8 ++++++-- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/.ci/scripts/setup-arm-baremetal-tools.sh b/.ci/scripts/setup-arm-baremetal-tools.sh index 7404b389e7d..454b9f336e9 100755 --- a/.ci/scripts/setup-arm-baremetal-tools.sh +++ b/.ci/scripts/setup-arm-baremetal-tools.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2024-2025 Arm Limited and/or its affiliates. +# Copyright 2024 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -9,5 +9,3 @@ git config --global user.email "github_executorch@arm.com" git config --global user.name "Github Executorch" bash examples/arm/setup.sh --i-agree-to-the-contained-eula - -./examples/models/llama3_2_vision/install_requirements.sh diff --git a/backends/arm/operators/op_slice.py b/backends/arm/operators/op_slice.py index d2223eb670b..a3ce80c5b24 100644 --- a/backends/arm/operators/op_slice.py +++ b/backends/arm/operators/op_slice.py @@ -33,7 +33,8 @@ def define_node( ) -> None: # See slice_copy_support.py - assert len(inputs) == 4 or (len(inputs) == 5 and inputs[4].number == 1) + if not (len(inputs) == 4 or (len(inputs) == 5 and inputs[4].number == 1)): + raise ValueError("Unsupported combination of inputs") # aten.slice_copy supports slicing in 1d at a time. # The arguments are the actual input, dimension of slicing, start index, end index and optinal step or stride. diff --git a/backends/arm/test/models/test_llama.py b/backends/arm/test/models/test_llama.py index 3a827fc73b2..973f62d2724 100644 --- a/backends/arm/test/models/test_llama.py +++ b/backends/arm/test/models/test_llama.py @@ -20,7 +20,6 @@ get_llama_model, ) -from executorch.exir import EdgeCompileConfig # Add project dir to sys path to workaround importlib.import_module() conditions in model_factory.py this_files_dir = os.path.dirname(os.path.abspath(__file__)) @@ -38,11 +37,6 @@ class TestLlama(unittest.TestCase): Example: --llama_inputs stories110M/stories110M.pt stories110M/params.json """ - _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig( - _check_ir_validity=False, - _skip_dim_order=True, # TODO(T182928844): Delegate dim order op to backend. - ) - def prepare_model(self): checkpoint = None @@ -117,12 +111,10 @@ def test_llama_tosa_MI(self): constant_methods=llama_meta, ) .export() - .to_edge_transform_and_lower( - edge_compile_config=self._edge_compile_config - ) + .to_edge_transform_and_lower() .check_count({"torch.ops.higher_order.executorch_call_delegate": 14}) .to_executorch() .run_method_and_compare_outputs( - inputs=llama_inputs, atol=1.8, rtol=0.01 + inputs=llama_inputs, atol=1.8, rtol=0.01 # TODO: decrease tolerance ) ) diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh index 90b34241f3d..0720843de2e 100755 --- a/backends/arm/test/test_arm_baremetal.sh +++ b/backends/arm/test/test_arm_baremetal.sh @@ -37,7 +37,7 @@ fi TEST_SUITE_NAME="$(basename "$0") ${TEST_SUITE}" all() { # Run all tests - # This will list all lines in this file that is starting with test_ remove () { and add this script name in + # This will list all lines in this file that is starting with test_ remove () { and add this script name in # front of it and execute it in a sub shell # e.g. from this file: # @@ -62,6 +62,9 @@ all() { # Run all tests test_pytest() { # Test ops and other things echo "${TEST_SUITE_NAME}: Run pytest" + + ./examples/models/llama3_2_vision/install_requirements.sh + cd "${et_root_dir}" source examples/arm/ethos-u-scratch/setup_path.sh backends/arm/scripts/build_quantized_ops_aot_lib.sh @@ -74,6 +77,7 @@ test_pytest() { # Test ops and other things test_pytest_ethosu_fvp() { # Same as test_pytest but also sometime verify using Corstone FVP echo "${TEST_SUITE_NAME}: Run pytest with fvp" + ./examples/models/llama3_2_vision/install_requirements.sh source examples/arm/ethos-u-scratch/setup_path.sh # Prepare Corstone-3x0 FVP for pytest @@ -107,7 +111,7 @@ test_run_ethosu_fvp() { # End to End model tests using run.sh echo "${TEST_SUITE_NAME}: PASS" } -test_models_ethosu_fvp() { # End to End model tests using model_test.py +test_models_ethosu_fvp() { # End to End model tests using model_test.py echo "${TEST_SUITE_NAME}: Test ethos-u delegate models with test_model.py" source examples/arm/ethos-u-scratch/setup_path.sh