From a863305fa483cddbe12299186e502ca6c3eba8cf Mon Sep 17 00:00:00 2001 From: Saoirse Stewart Date: Tue, 12 Nov 2024 15:26:34 +0000 Subject: [PATCH] Add aot_arm_compiler flag to allow the reordering of the inputs * Add capability to use cmd input order in the backend * Extend the test infrastructure to handle this --- backends/arm/arm_backend.py | 31 +++++++++++++++++++++++++++++-- backends/arm/arm_vela.py | 15 +++++++++------ backends/arm/test/common.py | 32 ++++++++++++++++++++++++++------ examples/arm/aot_arm_compiler.py | 18 ++++++++++++++++-- examples/arm/run.sh | 6 +++++- 5 files changed, 85 insertions(+), 17 deletions(-) diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py index 06207611e09..ad2d1e73afb 100644 --- a/backends/arm/arm_backend.py +++ b/backends/arm/arm_backend.py @@ -52,6 +52,7 @@ def __init__(self): self.permute_nhwc = False self.quantize_io = False self.tosa_version = None + self.input_order = None def ethosu_compile_spec( self, @@ -134,6 +135,14 @@ def set_quantize_io(self, quantize_io: bool = False) -> "ArmCompileSpecBuilder": self.quantize_io = quantize_io return self + def set_input_order(self, input_order: str = None) -> "ArmCompileSpecBuilder": + """ + Reorder the inputs coming in. This may be required when inputs > 1. + And while using the U55/U85 CompileSpec. + """ + self.input_order = input_order + return self + def build(self) -> List[CompileSpec]: """ Generate a list of compile spec objects from the builder @@ -163,6 +172,13 @@ def build(self) -> List[CompileSpec]: CompileSpec("permute_memory_format", "nhwc".encode()) ) + if self.input_order: + self.compile_spec.append( + CompileSpec( + "input_order", " ".join(map(str, self.input_order)).encode() + ) + ) + if self.quantize_io: self.compile_spec.append(CompileSpec("quantize_io", "True".encode())) @@ -214,6 +230,7 @@ def preprocess( # noqa: C901 artifact_path = None output_format = "" compile_flags = [] + input_order = [] for spec in compile_spec: if spec.key == "debug_artifact_path": artifact_path = spec.value.decode() @@ -221,6 +238,8 @@ def preprocess( # noqa: C901 output_format = spec.value.decode() if spec.key == "compile_flags": compile_flags.append(spec.value.decode()) + if spec.key == "input_order": + input_order = list(map(int, spec.value.decode().split(","))) # Check that the output format is set in the compile spec if not output_format: @@ -246,12 +265,14 @@ def preprocess( # noqa: C901 ) node_visitors = get_node_visitors(edge_program, tosa_spec) - + input_count = 0 for node in graph_module.graph.nodes: if node.op == "call_function": process_call_function(node, tosa_graph, node_visitors, tosa_spec) elif node.op == "placeholder": process_placeholder(node, tosa_graph, edge_program, tosa_spec) + if node.name in edge_program.graph_signature.user_inputs: + input_count += 1 elif node.op == "output": process_output(node, tosa_graph) else: @@ -259,6 +280,12 @@ def preprocess( # noqa: C901 # any checking of compatibility. dbg_fail(node, tosa_graph, artifact_path) + if len(input_order) > 0: + if input_count != len(input_order): + raise RuntimeError( + "The rank of the input order is not equal to amount of input tensors" + ) + # TODO: It would be awesome if this dump could somehow be done on top level and not here. # Problem is that the desc.json has to be created on the tosa_graph object, which we can't # access from top level. @@ -275,7 +302,7 @@ def preprocess( # noqa: C901 # preprocess and some consume TOSA fb directly. if output_format == "vela": # Emit vela_bin_stream format - binary = vela_compile(tosa_graph, compile_flags) + binary = vela_compile(tosa_graph, compile_flags, input_order) elif output_format == "tosa": # Emit TOSA flatbuffer binary = bytes(tosa_graph.serialize()) diff --git a/backends/arm/arm_vela.py b/backends/arm/arm_vela.py index 01bb8bd55e5..918d95ba379 100644 --- a/backends/arm/arm_vela.py +++ b/backends/arm/arm_vela.py @@ -17,10 +17,13 @@ # Pack either input or output tensor block, compose the related arrays into # per-io structs to simplify runtime use. -def vela_bin_pack_io(prefix, data): - ios = struct.pack(" list[CompileSpec]: """ Default compile spec for Ethos-U55 tests. """ return get_u55_compile_spec_unbuilt( - permute_memory_to_nhwc, quantize_io=quantize_io, custom_path=custom_path + permute_memory_to_nhwc, + quantize_io=quantize_io, + custom_path=custom_path, + reorder_inputs=reorder_inputs, ).build() def get_u85_compile_spec( - permute_memory_to_nhwc=True, quantize_io=False, custom_path=None + permute_memory_to_nhwc=True, + quantize_io=False, + custom_path=None, + reorder_inputs=None, ) -> list[CompileSpec]: """ Default compile spec for Ethos-U85 tests. """ return get_u85_compile_spec_unbuilt( - permute_memory_to_nhwc, quantize_io=quantize_io, custom_path=custom_path + permute_memory_to_nhwc, + quantize_io=quantize_io, + custom_path=custom_path, + reorder_inputs=reorder_inputs, ).build() def get_u55_compile_spec_unbuilt( - permute_memory_to_nhwc=True, quantize_io=False, custom_path=None + permute_memory_to_nhwc=True, + quantize_io=False, + custom_path=None, + reorder_inputs=None, ) -> ArmCompileSpecBuilder: """Get the ArmCompileSpecBuilder for the Ethos-U55 tests, to modify the compile spec before calling .build() to finalize it. @@ -254,12 +269,16 @@ def get_u55_compile_spec_unbuilt( .set_quantize_io(is_option_enabled("quantize_io") or quantize_io) .set_permute_memory_format(permute_memory_to_nhwc) .dump_intermediate_artifacts_to(artifact_path) + .set_input_order(reorder_inputs) ) return compile_spec def get_u85_compile_spec_unbuilt( - permute_memory_to_nhwc=True, quantize_io=False, custom_path=None + permute_memory_to_nhwc=True, + quantize_io=False, + custom_path=None, + reorder_inputs=None, ) -> list[CompileSpec]: """Get the ArmCompileSpecBuilder for the Ethos-U85 tests, to modify the compile spec before calling .build() to finalize it. @@ -276,6 +295,7 @@ def get_u85_compile_spec_unbuilt( .set_quantize_io(is_option_enabled("quantize_io") or quantize_io) .set_permute_memory_format(permute_memory_to_nhwc) .dump_intermediate_artifacts_to(artifact_path) + .set_input_order(reorder_inputs) ) return compile_spec diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index 4953f8735e3..ddd5fd6b0bb 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -245,7 +245,9 @@ def get_calibration_data( def get_compile_spec( - target: str, intermediates: Optional[str] = None + target: str, + intermediates: Optional[str] = None, + reorder_inputs: Optional[str] = None, ) -> ArmCompileSpecBuilder: spec_builder = None if target == "TOSA": @@ -265,6 +267,7 @@ def get_compile_spec( ) .set_permute_memory_format(True) .set_quantize_io(True) + .set_input_order(reorder_inputs) ) elif "ethos-u85" in target: spec_builder = ( @@ -277,6 +280,7 @@ def get_compile_spec( ) .set_permute_memory_format(True) .set_quantize_io(True) + .set_input_order(reorder_inputs) ) if intermediates is not None: @@ -419,6 +423,14 @@ def get_args(): required=False, help="Location for outputs, if not the default of cwd.", ) + parser.add_argument( + "-r", + "--reorder_inputs", + type=str, + required=False, + default=None, + help="Provide the order of the inputs. This can be required when inputs > 1.", + ) args = parser.parse_args() if args.evaluate and ( @@ -481,7 +493,9 @@ def get_args(): if args.delegate: # As we can target multiple output encodings from ArmBackend, one must # be specified. - compile_spec = get_compile_spec(args.target, args.intermediates) + compile_spec = get_compile_spec( + args.target, args.intermediates, args.reorder_inputs + ) edge = to_edge_transform_and_lower( exported_program, partitioner=[ArmPartitioner(compile_spec)], diff --git a/examples/arm/run.sh b/examples/arm/run.sh index e4941519449..6de8ec2d2b7 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -20,6 +20,7 @@ script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) root_dir=${script_dir}/ethos-u-scratch model_name="" +reorder_inputs="" aot_arm_compiler_flags="--delegate --quantize" target="ethos-u55-128" output_folder_set=false @@ -37,6 +38,7 @@ help() { echo " --output= Output folder Default: ${output_folder}" echo " --build_only Only build, don't run FVP" echo " --scratch-dir= Path to your Ethos-U scrach dir if you not using default" + echo " --reorder_inputs= Reorder the inputs. This can be required when inputs > 1." exit 0 } @@ -50,6 +52,7 @@ for arg in "$@"; do --output=*) output_folder="${arg#*=}" ; output_folder_set=true ;; --build_only) build_only=true ;; --scratch-dir=*) root_dir="${arg#*=}";; + --reorder_inputs=*) reorder_inputs="${arg#*=}";; *) ;; esac @@ -112,7 +115,7 @@ function generate_pte_file() { # We are using the aot_lib from build_quantization_aot_lib below SO_LIB=$(find cmake-out-aot-lib -name libquantized_ops_aot_lib.${SO_EXT}) - python3 -m examples.arm.aot_arm_compiler --model_name="${model}" --target=${target} ${model_compiler_flags} --output ${output_folder} --so_library="$SO_LIB" 1>&2 + python3 -m examples.arm.aot_arm_compiler --model_name="${model}" --target=${target} ${model_compiler_flags} --reorder_inputs=${reorder_inputs} --output ${output_folder} --so_library="$SO_LIB" 1>&2 [[ -f ${pte_file} ]] || { >&2 echo "Failed to generate a pte file - ${pte_file}"; exit 1; } echo "${pte_file}" } @@ -287,6 +290,7 @@ if [[ -z "$model_name" ]]; then else test_model=( "$model_name" ) model_compiler_flags=( "$aot_arm_compiler_flags" ) + reorder_inputs=( "$reorder_inputs" ) fi # loop over running the AoT flow and executing the model on device