Skip to content

Commit 73f2327

Browse files
Arm backend: Update VelaIO handling
VelaIO is always 6D. - Update AOT handling of metadata from Vela. - Adds unittest to trigger 5D cases. - Updates EthosUBackend to read IO as 6D arrays. Signed-off-by: Oscar Andersson <[email protected]> Change-Id: I8d7d3a44ac84e5bb14fa27e7b7765c3b7a8ee483
1 parent 7535720 commit 73f2327

File tree

8 files changed

+100
-29
lines changed

8 files changed

+100
-29
lines changed

backends/arm/arm_vela.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,19 @@
2525
# per-io structs to simplify runtime use.
2626
def vela_bin_pack_io(prefix, data):
2727
vela_input_shapes = data[prefix + "_shape"]
28+
# Vela input/output shape is fixed to 6D
29+
vela_io_shape_dims = 6
2830

2931
ios = struct.pack("<i", len(vela_input_shapes))
3032
for i in range(len(vela_input_shapes)):
3133
io_shape = vela_input_shapes[i]
3234
io_elem_size = data[prefix + "_elem_size"][i]
3335
io_offset = data[prefix + "_offset"][i]
3436
io_region = data[prefix + "_region"][i]
35-
assert len(io_shape) <= 4
36-
inp_pad = io_shape.tolist() + [0] * (4 - len(io_shape))
37+
assert len(io_shape) == vela_io_shape_dims
38+
inp_pad = io_shape.tolist()
3739
io_struct = struct.pack(
38-
"<iiiiiii", *inp_pad, io_elem_size, io_offset, io_region
40+
"<iiiiiiiii", *inp_pad, io_elem_size, io_offset, io_region
3941
)
4042
ios += io_struct
4143
return ios

backends/arm/runtime/EthosUBackend.cpp

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -277,12 +277,11 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
277277
event_tracer,
278278
"+EthosUBackend::execute()handles.input.permute_CHW_to_HWC()");
279279
// permuted byte copy CHW to HWC
280+
int c, h, w;
281+
ET_CHECK_OK_OR_RETURN_ERROR(get_chw(tensor_in, &c, &h, &w));
282+
280283
permute_CHW_to_HWC(
281-
tensor_in.mutable_data_ptr<char>(),
282-
scratch_addr,
283-
tensor_in.size(1),
284-
tensor_in.size(2),
285-
tensor_in.size(3));
284+
tensor_in.mutable_data_ptr<char>(), scratch_addr, c, h, w);
286285
} else if (both_char || both_int || both_short || both_bool) {
287286
EXECUTORCH_PROF_SCOPE(
288287
event_tracer, "+EthosUBackend::execute()handles.input.memcpy()");
@@ -379,13 +378,11 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
379378
"+EthosUBackend::execute()handles.output.permute_HWC_to_CHW()");
380379

381380
const char* output_address = static_cast<const char*>(output_addr);
381+
int c, h, w;
382+
ET_CHECK_OK_OR_RETURN_ERROR(get_chw(tensor_out, &c, &h, &w));
382383

383384
permute_HWC_to_CHW(
384-
output_address,
385-
tensor_out.mutable_data_ptr<char>(),
386-
tensor_out.size(1),
387-
tensor_out.size(2),
388-
tensor_out.size(3));
385+
output_address, tensor_out.mutable_data_ptr<char>(), c, h, w);
389386
} else {
390387
EXECUTORCH_PROF_SCOPE(
391388
event_tracer, "+EthosUBackend::execute()handles.output.memcpy()");
@@ -419,8 +416,8 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
419416
*tensor_count = *tensor_count * tensor.size(i);
420417
}
421418

422-
// The VelaIO type has a shape of fixed size 4
423-
for (int i = 0; i < 4; i++) {
419+
// The VelaIO type has a shape of fixed size 6
420+
for (int i = 0; i < shapeDim; i++) {
424421
*io_count = *io_count * io->shape[i];
425422
}
426423
}
@@ -436,17 +433,46 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
436433
// special case for NHWC workaround in AOT; as the compilation has
437434
// permuted to channel last in an undetectable way, we assume here
438435
// that the application has similarly permuted any input/output tensors.
439-
permuted_shape = tensor.size(0) == io->shape[0] &&
440-
tensor.size(1) == io->shape[3] && tensor.size(2) == io->shape[1] &&
441-
tensor.size(3) == io->shape[2];
436+
permuted_shape =
437+
tensor.size(0) == io->shape[0] * io->shape[1] * io->shape[2] &&
438+
tensor.size(1) == io->shape[5] && tensor.size(2) == io->shape[3] &&
439+
tensor.size(3) == io->shape[4];
442440
if (permuted_shape) {
443-
ET_LOG(Debug, "Tensor input/output %d will be permuted", index);
441+
ET_LOG(Debug, "4D tensor input/output %d will be permuted", index);
442+
}
443+
} else if (tensor.dim() == 5) {
444+
// tensor has format NNCHW, but the VelaIO is in NNNHWC
445+
permuted_shape = io->shape[0] == 1 && tensor.size(0) == io->shape[1] &&
446+
tensor.size(1) == io->shape[2] && tensor.size(2) == io->shape[5] &&
447+
tensor.size(3) == io->shape[3] && tensor.size(4) == io->shape[4];
448+
if (permuted_shape) {
449+
ET_LOG(Debug, "5D tensor input/output %d will be permuted", index);
444450
}
445451
}
446452
*is_permuted = permuted_shape;
447453
return Error::Ok;
448454
}
449455

456+
Error get_chw(const executorch::aten::Tensor tensor, int* c, int* h, int* w)
457+
const {
458+
if (tensor.dim() == 4) {
459+
*c = tensor.size(1);
460+
*h = tensor.size(2);
461+
*w = tensor.size(3);
462+
} else if (tensor.dim() == 5) {
463+
*c = tensor.size(2);
464+
*h = tensor.size(3);
465+
*w = tensor.size(4);
466+
} else {
467+
ET_LOG(
468+
Error,
469+
"Unsupported output tensor dimension %d, expected 4 or 5",
470+
tensor.dim());
471+
return Error::InvalidProgram;
472+
}
473+
return Error::Ok;
474+
}
475+
450476
void permute_CHW_to_HWC(const char* input, char* output, int C, int H, int W)
451477
const {
452478
for (int i = 0; i != H * W; ++i) {

backends/arm/runtime/VelaBinStream.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 Arm Limited and/or its affiliates.
2+
* Copyright 2023-2025 Arm Limited and/or its affiliates.
33
*
44
* This source code is licensed under the BSD-style license found in the
55
* LICENSE file in the root directory of this source tree.
@@ -34,9 +34,11 @@ typedef struct {
3434
char data[]; // block.name specific format data
3535
} VelaBinBlock;
3636

37+
constexpr int shapeDim = 6; // Number of dimensions in VelaIO
38+
3739
// A Vela input or output descriptor in the binary stream
3840
typedef struct {
39-
int shape[4]; // Up to 4D shape of input or output
41+
int shape[shapeDim]; // Shape of input or output
4042
int elem_size; // Element sizeof in bytes
4143
int offset; // Offset in bytes within SRAM working data
4244
int region; // Scratch region this belongs to

backends/arm/test/ops/test_squeeze.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class SqueezeDim(torch.nn.Module):
2929
"squeeze3d_dim_neg_2": lambda: (torch.randn(1, 1, 5), -2),
3030
"squeeze4d_dim_pos_3": lambda: (torch.randn(1, 2, 3, 1), 3),
3131
"squeeze4d_dim_neg_2": lambda: (torch.randn(1, 5, 1, 5), -2),
32+
"squeeze5d_dim_neg_2": lambda: (torch.randn(1, 1, 5, 1, 5), -2),
3233
}
3334

3435
def forward(self, x: torch.Tensor, dim: int):
@@ -40,6 +41,7 @@ class SqueezeDims(torch.nn.Module):
4041
"squeeze3d_dims_0_1": lambda: (torch.randn(1, 1, 5), (0, 1)),
4142
"squeeze4d_dims_0_neg_1": lambda: (torch.randn(1, 5, 5, 1), (0, -1)),
4243
"squeeze4d_dims_0_neg_2": lambda: (torch.randn(1, 5, 1, 5), (0, -2)),
44+
"squeeze5d_dims_0_neg_2": lambda: (torch.randn(1, 1, 5, 1, 5), (0, -2)),
4345
}
4446

4547
def forward(self, x: torch.Tensor, dims: tuple[int]):
@@ -51,6 +53,7 @@ class Squeeze(torch.nn.Module):
5153
"squeeze3d": lambda: (torch.randn(1, 1, 5),),
5254
"squeeze4d_dims": lambda: (torch.randn(1, 5, 5, 1),),
5355
"squeeze3d_dims_mix": lambda: (torch.randn(1, 5, 1, 5),),
56+
"squeeze4d_dims_mix": lambda: (torch.randn(1, 1, 5, 1, 5),),
5457
}
5558

5659
def forward(self, x: torch.Tensor):

backends/arm/test/ops/test_unflatten.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import torch
1010
from executorch.backends.arm.test import common
1111
from executorch.backends.arm.test.tester.test_pipeline import (
12+
EthosU55PipelineINT,
13+
EthosU85PipelineINT,
1214
TosaPipelineFP,
1315
TosaPipelineINT,
1416
VgfPipeline,
@@ -30,8 +32,10 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
3032
return torch.unflatten(x, self.dim, self.sizes)
3133

3234
test_data: dict[str, test_data_t] = {
33-
"randn_4d": (lambda: (Unflatten(1, (2, 2)), (torch.randn(3, 4, 5, 1),))),
34-
"rand_3d": (lambda: (Unflatten(1, (-1, 2)), (torch.rand(3, 4, 4),))),
35+
"rand_3d_batch3": (lambda: (Unflatten(1, (-1, 2)), (torch.rand(3, 4, 4),))),
36+
"rand_3d_batch1": (lambda: (Unflatten(1, (-1, 2)), (torch.rand(1, 4, 4),))),
37+
"randn_4d_dim1": (lambda: (Unflatten(1, (2, 2)), (torch.randn(3, 4, 5, 1),))),
38+
"randn_4d_dim3": (lambda: (Unflatten(3, (2, 2)), (torch.randn(1, 1, 5, 4),))),
3539
}
3640

3741

@@ -49,7 +53,33 @@ def test_unflatten_int_tosa_FP(test_data: test_data_t):
4953
@common.parametrize("test_data", Unflatten.test_data)
5054
def test_unflatten_int_tosa_INT(test_data: test_data_t):
5155
module, inputs = test_data()
52-
pipeline = TosaPipelineINT[input_t](
56+
pipeline = TosaPipelineINT[input_t](module, inputs, Unflatten.aten_op)
57+
pipeline.run()
58+
59+
60+
xfails = {
61+
"rand_3d_batch3": "Batch size > 1 currently not supported for FVP tests",
62+
"randn_4d_dim1": "Batch size > 1 currently not supported for FVP tests",
63+
}
64+
65+
66+
@common.parametrize("test_data", Unflatten.test_data, xfails=xfails, strict=False)
67+
@common.XfailIfNoCorstone300
68+
def test_unflatten_int_u55_INT(test_data: test_data_t):
69+
module, inputs = test_data()
70+
pipeline = EthosU55PipelineINT[input_t](
71+
module,
72+
inputs,
73+
Unflatten.aten_op,
74+
)
75+
pipeline.run()
76+
77+
78+
@common.parametrize("test_data", Unflatten.test_data, xfails=xfails, strict=False)
79+
@common.XfailIfNoCorstone320
80+
def test_unflatten_int_u85_INT(test_data: test_data_t):
81+
module, inputs = test_data()
82+
pipeline = EthosU85PipelineINT[input_t](
5383
module,
5484
inputs,
5585
Unflatten.aten_op,

backends/arm/test/ops/test_unsqueeze.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626

2727
class Unsqueeze(torch.nn.Module):
28-
shapes: list[int | Sequence[int]] = [5, (5, 5), (5, 4), (5, 4, 3)]
28+
shapes: list[int | Sequence[int]] = [5, (5, 5), (5, 4), (5, 4, 3), (1, 5, 4, 3)]
2929
test_parameters = {}
3030
for n in shapes:
3131
test_parameters[f"rand_{n}"] = (torch.randn(n),)

backends/arm/test/ops/test_view.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ class View(torch.nn.Module):
4444
"rand_4d_4_3": lambda: (torch.rand(5, 10, 1, 1), (1, 25, 2)),
4545
"rand_4d_4_2": lambda: (torch.rand(2, 50, 1, 1), (1, 100)),
4646
"rand_4d_2_4_same": lambda: (torch.rand(2, 3, 2, 3), (2, 3, 3, 2)),
47+
"rand_4d_5d": lambda: (torch.rand(1, 3, 4, 5), (1, 1, 4, 5, -1)),
48+
"rand_5d_5d": lambda: (torch.rand(1, 1, 4, 5, 6), (1, 1, 4, -1, 6)),
49+
"rand_5d_3d": lambda: (torch.rand(1, 1, 4, 5, 6), (2, 3, -1)),
50+
"rand_3d_5d": lambda: (torch.rand(4, 5, 6), (1, 1, 2, -1, 3)),
4751
}
4852

4953
rank_product_too_large = {
@@ -97,7 +101,9 @@ def test_view_tosa_INT(test_data: Tuple):
97101
}
98102

99103

100-
@common.parametrize("test_data", View.needs_transpose_tests, xfails=xfails)
104+
@common.parametrize(
105+
"test_data", View.needs_transpose_tests, xfails=xfails, strict=False
106+
)
101107
@common.XfailIfNoCorstone300
102108
def test_view_u55_INT(test_data: Tuple):
103109
test_tensor, new_shape = test_data()
@@ -151,7 +157,9 @@ def test_view_u55_INT_not_delegated(test_data: Tuple):
151157
pipeline.run()
152158

153159

154-
@common.parametrize("test_data", View.needs_transpose_tests, xfails=xfails)
160+
@common.parametrize(
161+
"test_data", View.needs_transpose_tests, xfails=xfails, strict=False
162+
)
155163
@common.XfailIfNoCorstone320
156164
def test_view_u85_INT(test_data: Tuple):
157165
test_tensor, new_shape = test_data()

examples/arm/setup.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ fi
6060

6161
# Vela
6262
vela_repo_url="https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela"
63-
vela_rev="d37febc1715edf0d236c2ff555739a8a9aadcf9a"
63+
vela_rev="5e7cee87e90f433e85c567ece1ebdede2328da1a"
6464

6565
# MLSDK dependencies
6666
mlsdk_manifest_dir="ml-sdk-for-vulkan-manifest"
@@ -299,7 +299,7 @@ function select_toolchain() {
299299
fi
300300
elif [[ "${OS}" == "Linux" ]]; then
301301
if [[ "${target_toolchain}" == "zephyr" ]]; then
302-
# eventually, this can be support by downloading the the toolchain from
302+
# eventually, this can be support by downloading the the toolchain from
303303
# "https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.17.2/toolchain_linux-aarch64_arm-zephyr-eabi.tar.xz"
304304
# but for now, we error if user tries to specify this
305305
echo "[main] Error: currently target_toolchain zephyr is only support for x86-64 Linux host systems!"; exit 1;

0 commit comments

Comments
 (0)