Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions backends/arm/arm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(self):
self.permute_nhwc = False
self.quantize_io = False
self.tosa_version = None
self.input_order = None

def ethosu_compile_spec(
self,
Expand Down Expand Up @@ -134,6 +135,14 @@ def set_quantize_io(self, quantize_io: bool = False) -> "ArmCompileSpecBuilder":
self.quantize_io = quantize_io
return self

def set_input_order(self, input_order: str = None) -> "ArmCompileSpecBuilder":
"""
Reorder the inputs coming in. This may be required when inputs > 1.
And while using the U55/U85 CompileSpec.
"""
self.input_order = input_order
return self

def build(self) -> List[CompileSpec]:
"""
Generate a list of compile spec objects from the builder
Expand Down Expand Up @@ -163,6 +172,13 @@ def build(self) -> List[CompileSpec]:
CompileSpec("permute_memory_format", "nhwc".encode())
)

if self.input_order:
self.compile_spec.append(
CompileSpec(
"input_order", " ".join(map(str, self.input_order)).encode()
)
)

if self.quantize_io:
self.compile_spec.append(CompileSpec("quantize_io", "True".encode()))

Expand Down Expand Up @@ -214,13 +230,16 @@ def preprocess( # noqa: C901
artifact_path = None
output_format = ""
compile_flags = []
input_order = []
for spec in compile_spec:
if spec.key == "debug_artifact_path":
artifact_path = spec.value.decode()
if spec.key == "output_format":
output_format = spec.value.decode()
if spec.key == "compile_flags":
compile_flags.append(spec.value.decode())
if spec.key == "input_order":
input_order = list(map(int, spec.value.decode().split(",")))

# Check that the output format is set in the compile spec
if not output_format:
Expand All @@ -246,19 +265,27 @@ def preprocess( # noqa: C901
)

node_visitors = get_node_visitors(edge_program, tosa_spec)

input_count = 0
for node in graph_module.graph.nodes:
if node.op == "call_function":
process_call_function(node, tosa_graph, node_visitors, tosa_spec)
elif node.op == "placeholder":
process_placeholder(node, tosa_graph, edge_program, tosa_spec)
if node.name in edge_program.graph_signature.user_inputs:
input_count += 1
elif node.op == "output":
process_output(node, tosa_graph)
else:
# This will only happen if an unpartitioned graph is passed without
# any checking of compatibility.
dbg_fail(node, tosa_graph, artifact_path)

if len(input_order) > 0:
if input_count != len(input_order):
raise RuntimeError(
"The rank of the input order is not equal to amount of input tensors"
)

# TODO: It would be awesome if this dump could somehow be done on top level and not here.
# Problem is that the desc.json has to be created on the tosa_graph object, which we can't
# access from top level.
Expand All @@ -275,7 +302,7 @@ def preprocess( # noqa: C901
# preprocess and some consume TOSA fb directly.
if output_format == "vela":
# Emit vela_bin_stream format
binary = vela_compile(tosa_graph, compile_flags)
binary = vela_compile(tosa_graph, compile_flags, input_order)
elif output_format == "tosa":
# Emit TOSA flatbuffer
binary = bytes(tosa_graph.serialize())
Expand Down
15 changes: 9 additions & 6 deletions backends/arm/arm_vela.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@

# Pack either input or output tensor block, compose the related arrays into
# per-io structs to simplify runtime use.
def vela_bin_pack_io(prefix, data):
ios = struct.pack("<i", len(data[prefix + "_shape"]))
for i in range(len(data[prefix + "_shape"])):
io_shape = data[prefix + "_shape"][i]
def vela_bin_pack_io(prefix, data, shape_order=None):
vela_input_shapes = data[prefix + "_shape"]

order = shape_order if shape_order else range(len(vela_input_shapes))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: order = shape_order or range(len(vela_input_shapes)) would be more idiomatic

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the review, and I agree.
I will fix this in another commit.

Thanks,
Saoirse

ios = struct.pack("<i", len(vela_input_shapes))
for i in order:
io_shape = vela_input_shapes[i]
io_elem_size = data[prefix + "_elem_size"][i]
io_offset = data[prefix + "_offset"][i]
io_region = data[prefix + "_region"][i]
Expand All @@ -36,7 +39,7 @@ def vela_bin_pack_io(prefix, data):
# Output via Vela to binary stream for ArmBackendEthosU
# WARNING: Do not change this without changing VelaBinStream.cpp as that
# function consumes this format and the two need to align.
def vela_compile(tosa_graph, args: List[str]):
def vela_compile(tosa_graph, args: List[str], shape_order=None):
with tempfile.TemporaryDirectory() as tmpdir:
tosaname = "out.tosa"
flatbuffer = tosa_graph.serialize()
Expand Down Expand Up @@ -78,7 +81,7 @@ def vela_compile(tosa_graph, args: List[str]):
bin_blocks["scratch_data"] = b"\x00" * block_length

# Capture inputs and outputs
bin_blocks["inputs"] = vela_bin_pack_io("input", data)
bin_blocks["inputs"] = vela_bin_pack_io("input", data, shape_order)
bin_blocks["outputs"] = vela_bin_pack_io("output", data)

bin_blocks["vela_end_stream"] = b""
Expand Down
32 changes: 26 additions & 6 deletions backends/arm/test/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,29 +213,44 @@ def get_tosa_compile_spec_unbuilt(


def get_u55_compile_spec(
permute_memory_to_nhwc=True, quantize_io=False, custom_path=None
permute_memory_to_nhwc=True,
quantize_io=False,
custom_path=None,
reorder_inputs=None,
) -> list[CompileSpec]:
"""
Default compile spec for Ethos-U55 tests.
"""
return get_u55_compile_spec_unbuilt(
permute_memory_to_nhwc, quantize_io=quantize_io, custom_path=custom_path
permute_memory_to_nhwc,
quantize_io=quantize_io,
custom_path=custom_path,
reorder_inputs=reorder_inputs,
).build()


def get_u85_compile_spec(
permute_memory_to_nhwc=True, quantize_io=False, custom_path=None
permute_memory_to_nhwc=True,
quantize_io=False,
custom_path=None,
reorder_inputs=None,
) -> list[CompileSpec]:
"""
Default compile spec for Ethos-U85 tests.
"""
return get_u85_compile_spec_unbuilt(
permute_memory_to_nhwc, quantize_io=quantize_io, custom_path=custom_path
permute_memory_to_nhwc,
quantize_io=quantize_io,
custom_path=custom_path,
reorder_inputs=reorder_inputs,
).build()


def get_u55_compile_spec_unbuilt(
permute_memory_to_nhwc=True, quantize_io=False, custom_path=None
permute_memory_to_nhwc=True,
quantize_io=False,
custom_path=None,
reorder_inputs=None,
) -> ArmCompileSpecBuilder:
"""Get the ArmCompileSpecBuilder for the Ethos-U55 tests, to modify
the compile spec before calling .build() to finalize it.
Expand All @@ -254,12 +269,16 @@ def get_u55_compile_spec_unbuilt(
.set_quantize_io(is_option_enabled("quantize_io") or quantize_io)
.set_permute_memory_format(permute_memory_to_nhwc)
.dump_intermediate_artifacts_to(artifact_path)
.set_input_order(reorder_inputs)
)
return compile_spec


def get_u85_compile_spec_unbuilt(
permute_memory_to_nhwc=True, quantize_io=False, custom_path=None
permute_memory_to_nhwc=True,
quantize_io=False,
custom_path=None,
reorder_inputs=None,
) -> list[CompileSpec]:
"""Get the ArmCompileSpecBuilder for the Ethos-U85 tests, to modify
the compile spec before calling .build() to finalize it.
Expand All @@ -276,6 +295,7 @@ def get_u85_compile_spec_unbuilt(
.set_quantize_io(is_option_enabled("quantize_io") or quantize_io)
.set_permute_memory_format(permute_memory_to_nhwc)
.dump_intermediate_artifacts_to(artifact_path)
.set_input_order(reorder_inputs)
)
return compile_spec

Expand Down
18 changes: 16 additions & 2 deletions examples/arm/aot_arm_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,9 @@ def get_calibration_data(


def get_compile_spec(
target: str, intermediates: Optional[str] = None
target: str,
intermediates: Optional[str] = None,
reorder_inputs: Optional[str] = None,
) -> ArmCompileSpecBuilder:
spec_builder = None
if target == "TOSA":
Expand All @@ -265,6 +267,7 @@ def get_compile_spec(
)
.set_permute_memory_format(True)
.set_quantize_io(True)
.set_input_order(reorder_inputs)
)
elif "ethos-u85" in target:
spec_builder = (
Expand All @@ -277,6 +280,7 @@ def get_compile_spec(
)
.set_permute_memory_format(True)
.set_quantize_io(True)
.set_input_order(reorder_inputs)
)

if intermediates is not None:
Expand Down Expand Up @@ -419,6 +423,14 @@ def get_args():
required=False,
help="Location for outputs, if not the default of cwd.",
)
parser.add_argument(
"-r",
"--reorder_inputs",
type=str,
required=False,
default=None,
help="Provide the order of the inputs. This can be required when inputs > 1.",
)
args = parser.parse_args()

if args.evaluate and (
Expand Down Expand Up @@ -481,7 +493,9 @@ def get_args():
if args.delegate:
# As we can target multiple output encodings from ArmBackend, one must
# be specified.
compile_spec = get_compile_spec(args.target, args.intermediates)
compile_spec = get_compile_spec(
args.target, args.intermediates, args.reorder_inputs
)
edge = to_edge_transform_and_lower(
exported_program,
partitioner=[ArmPartitioner(compile_spec)],
Expand Down
6 changes: 5 additions & 1 deletion examples/arm/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
root_dir=${script_dir}/ethos-u-scratch

model_name=""
reorder_inputs=""
aot_arm_compiler_flags="--delegate --quantize"
target="ethos-u55-128"
output_folder_set=false
Expand All @@ -37,6 +38,7 @@ help() {
echo " --output=<FOLDER> Output folder Default: ${output_folder}"
echo " --build_only Only build, don't run FVP"
echo " --scratch-dir=<FOLDER> Path to your Ethos-U scrach dir if you not using default"
echo " --reorder_inputs=<FLAGS> Reorder the inputs. This can be required when inputs > 1."
exit 0
}

Expand All @@ -50,6 +52,7 @@ for arg in "$@"; do
--output=*) output_folder="${arg#*=}" ; output_folder_set=true ;;
--build_only) build_only=true ;;
--scratch-dir=*) root_dir="${arg#*=}";;
--reorder_inputs=*) reorder_inputs="${arg#*=}";;
*)
;;
esac
Expand Down Expand Up @@ -112,7 +115,7 @@ function generate_pte_file() {
# We are using the aot_lib from build_quantization_aot_lib below
SO_LIB=$(find cmake-out-aot-lib -name libquantized_ops_aot_lib.${SO_EXT})

python3 -m examples.arm.aot_arm_compiler --model_name="${model}" --target=${target} ${model_compiler_flags} --output ${output_folder} --so_library="$SO_LIB" 1>&2
python3 -m examples.arm.aot_arm_compiler --model_name="${model}" --target=${target} ${model_compiler_flags} --reorder_inputs=${reorder_inputs} --output ${output_folder} --so_library="$SO_LIB" 1>&2
[[ -f ${pte_file} ]] || { >&2 echo "Failed to generate a pte file - ${pte_file}"; exit 1; }
echo "${pte_file}"
}
Expand Down Expand Up @@ -287,6 +290,7 @@ if [[ -z "$model_name" ]]; then
else
test_model=( "$model_name" )
model_compiler_flags=( "$aot_arm_compiler_flags" )
reorder_inputs=( "$reorder_inputs" )
fi

# loop over running the AoT flow and executing the model on device
Expand Down
Loading