Skip to content

Commit df556e6

Browse files
Arm backend: Update VelaIO handling
VelaIO is always 6D. - Update AOT handling of metadata from Vela. - Adds unittest to trigger 5D cases. - Updates EthosUBackend to read IO as 6D arrays. Signed-off-by: Oscar Andersson <[email protected]> Change-Id: I8d7d3a44ac84e5bb14fa27e7b7765c3b7a8ee483
1 parent ecb639a commit df556e6

File tree

8 files changed

+99
-28
lines changed

8 files changed

+99
-28
lines changed

backends/arm/arm_vela.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,19 @@
2525
# per-io structs to simplify runtime use.
2626
def vela_bin_pack_io(prefix, data):
2727
vela_input_shapes = data[prefix + "_shape"]
28+
# Vela input/output shape is fixed to 6D
29+
vela_io_shape_dims = 6
2830

2931
ios = struct.pack("<i", len(vela_input_shapes))
3032
for i in range(len(vela_input_shapes)):
3133
io_shape = vela_input_shapes[i]
3234
io_elem_size = data[prefix + "_elem_size"][i]
3335
io_offset = data[prefix + "_offset"][i]
3436
io_region = data[prefix + "_region"][i]
35-
assert len(io_shape) <= 4
36-
inp_pad = io_shape.tolist() + [0] * (4 - len(io_shape))
37+
assert len(io_shape) == vela_io_shape_dims
38+
inp_pad = io_shape.tolist()
3739
io_struct = struct.pack(
38-
"<iiiiiii", *inp_pad, io_elem_size, io_offset, io_region
40+
"<iiiiiiiii", *inp_pad, io_elem_size, io_offset, io_region
3941
)
4042
ios += io_struct
4143
return ios

backends/arm/runtime/EthosUBackend.cpp

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -279,12 +279,11 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
279279
event_tracer,
280280
"+EthosUBackend::execute()handles.input.permute_CHW_to_HWC()");
281281
// permuted byte copy CHW to HWC
282+
int c, h, w;
283+
ET_CHECK_OK_OR_RETURN_ERROR(get_chw(tensor_in, &c, &h, &w));
284+
282285
permute_CHW_to_HWC(
283-
tensor_in.mutable_data_ptr<char>(),
284-
scratch_addr,
285-
tensor_in.size(1),
286-
tensor_in.size(2),
287-
tensor_in.size(3));
286+
tensor_in.mutable_data_ptr<char>(), scratch_addr, c, h, w);
288287
} else if (both_char || both_int || both_short || both_bool) {
289288
EXECUTORCH_PROF_SCOPE(
290289
event_tracer, "+EthosUBackend::execute()handles.input.memcpy()");
@@ -381,13 +380,11 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
381380
"+EthosUBackend::execute()handles.output.permute_HWC_to_CHW()");
382381

383382
const char* output_address = static_cast<const char*>(output_addr);
383+
int c, h, w;
384+
ET_CHECK_OK_OR_RETURN_ERROR(get_chw(tensor_out, &c, &h, &w));
384385

385386
permute_HWC_to_CHW(
386-
output_address,
387-
tensor_out.mutable_data_ptr<char>(),
388-
tensor_out.size(1),
389-
tensor_out.size(2),
390-
tensor_out.size(3));
387+
output_address, tensor_out.mutable_data_ptr<char>(), c, h, w);
391388
} else {
392389
EXECUTORCH_PROF_SCOPE(
393390
event_tracer, "+EthosUBackend::execute()handles.output.memcpy()");
@@ -421,8 +418,8 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
421418
*tensor_count = *tensor_count * tensor.size(i);
422419
}
423420

424-
// The VelaIO type has a shape of fixed size 4
425-
for (int i = 0; i < 4; i++) {
421+
// The VelaIO type has a shape of fixed size 6
422+
for (int i = 0; i < shapeDim; i++) {
426423
*io_count = *io_count * io->shape[i];
427424
}
428425
}
@@ -438,17 +435,46 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
438435
// special case for NHWC workaround in AOT; as the compilation has
439436
// permuted to channel last in an undetectable way, we assume here
440437
// that the application has similarly permuted any input/output tensors.
441-
permuted_shape = tensor.size(0) == io->shape[0] &&
442-
tensor.size(1) == io->shape[3] && tensor.size(2) == io->shape[1] &&
443-
tensor.size(3) == io->shape[2];
438+
permuted_shape =
439+
tensor.size(0) == io->shape[0] * io->shape[1] * io->shape[2] &&
440+
tensor.size(1) == io->shape[5] && tensor.size(2) == io->shape[3] &&
441+
tensor.size(3) == io->shape[4];
444442
if (permuted_shape) {
445-
ET_LOG(Debug, "Tensor input/output %d will be permuted", index);
443+
ET_LOG(Debug, "4D tensor input/output %d will be permuted", index);
444+
}
445+
} else if (tensor.dim() == 5) {
446+
// tensor has format NNCHW, but the VelaIO is in NNNHWC
447+
permuted_shape = io->shape[0] == 1 && tensor.size(0) == io->shape[1] &&
448+
tensor.size(1) == io->shape[2] && tensor.size(2) == io->shape[5] &&
449+
tensor.size(3) == io->shape[3] && tensor.size(4) == io->shape[4];
450+
if (permuted_shape) {
451+
ET_LOG(Debug, "5D tensor input/output %d will be permuted", index);
446452
}
447453
}
448454
*is_permuted = permuted_shape;
449455
return Error::Ok;
450456
}
451457

458+
Error get_chw(const executorch::aten::Tensor tensor, int* c, int* h, int* w)
459+
const {
460+
if (tensor.dim() == 4) {
461+
*c = tensor.size(1);
462+
*h = tensor.size(2);
463+
*w = tensor.size(3);
464+
} else if (tensor.dim() == 5) {
465+
*c = tensor.size(2);
466+
*h = tensor.size(3);
467+
*w = tensor.size(4);
468+
} else {
469+
ET_LOG(
470+
Error,
471+
"Unsupported output tensor dimension %d, expected 4 or 5",
472+
tensor.dim());
473+
return Error::InvalidProgram;
474+
}
475+
return Error::Ok;
476+
}
477+
452478
void permute_CHW_to_HWC(const char* input, char* output, int C, int H, int W)
453479
const {
454480
for (int i = 0; i != H * W; ++i) {

backends/arm/runtime/VelaBinStream.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 Arm Limited and/or its affiliates.
2+
* Copyright 2023-2025 Arm Limited and/or its affiliates.
33
*
44
* This source code is licensed under the BSD-style license found in the
55
* LICENSE file in the root directory of this source tree.
@@ -34,9 +34,11 @@ typedef struct {
3434
char data[]; // block.name specific format data
3535
} VelaBinBlock;
3636

37+
constexpr int shapeDim = 6; // Number of dimensions in VelaIO
38+
3739
// A Vela input or output descriptor in the binary stream
3840
typedef struct {
39-
int shape[4]; // Up to 4D shape of input or output
41+
int shape[shapeDim]; // Shape of input or output
4042
int elem_size; // Element sizeof in bytes
4143
int offset; // Offset in bytes within SRAM working data
4244
int region; // Scratch region this belongs to

backends/arm/test/ops/test_squeeze.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class SqueezeDim(torch.nn.Module):
2929
"squeeze3d_dim_neg_2": lambda: (torch.randn(1, 1, 5), -2),
3030
"squeeze4d_dim_pos_3": lambda: (torch.randn(1, 2, 3, 1), 3),
3131
"squeeze4d_dim_neg_2": lambda: (torch.randn(1, 5, 1, 5), -2),
32+
"squeeze5d_dim_neg_2": lambda: (torch.randn(1, 1, 5, 1, 5), -2),
3233
}
3334

3435
def forward(self, x: torch.Tensor, dim: int):
@@ -40,6 +41,7 @@ class SqueezeDims(torch.nn.Module):
4041
"squeeze3d_dims_0_1": lambda: (torch.randn(1, 1, 5), (0, 1)),
4142
"squeeze4d_dims_0_neg_1": lambda: (torch.randn(1, 5, 5, 1), (0, -1)),
4243
"squeeze4d_dims_0_neg_2": lambda: (torch.randn(1, 5, 1, 5), (0, -2)),
44+
"squeeze5d_dims_0_neg_2": lambda: (torch.randn(1, 1, 5, 1, 5), (0, -2)),
4345
}
4446

4547
def forward(self, x: torch.Tensor, dims: tuple[int]):
@@ -51,6 +53,7 @@ class Squeeze(torch.nn.Module):
5153
"squeeze3d": lambda: (torch.randn(1, 1, 5),),
5254
"squeeze4d_dims": lambda: (torch.randn(1, 5, 5, 1),),
5355
"squeeze3d_dims_mix": lambda: (torch.randn(1, 5, 1, 5),),
56+
"squeeze4d_dims_mix": lambda: (torch.randn(1, 1, 5, 1, 5),),
5457
}
5558

5659
def forward(self, x: torch.Tensor):

backends/arm/test/ops/test_unflatten.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import torch
1010
from executorch.backends.arm.test import common
1111
from executorch.backends.arm.test.tester.test_pipeline import (
12+
EthosU55PipelineINT,
13+
EthosU85PipelineINT,
1214
TosaPipelineFP,
1315
TosaPipelineINT,
1416
VgfPipeline,
@@ -30,8 +32,10 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
3032
return torch.unflatten(x, self.dim, self.sizes)
3133

3234
test_data: dict[str, test_data_t] = {
33-
"randn_4d": (lambda: (Unflatten(1, (2, 2)), (torch.randn(3, 4, 5, 1),))),
34-
"rand_3d": (lambda: (Unflatten(1, (-1, 2)), (torch.rand(3, 4, 4),))),
35+
"rand_3d_batch3": (lambda: (Unflatten(1, (-1, 2)), (torch.rand(3, 4, 4),))),
36+
"rand_3d_batch1": (lambda: (Unflatten(1, (-1, 2)), (torch.rand(1, 4, 4),))),
37+
"randn_4d_dim1": (lambda: (Unflatten(1, (2, 2)), (torch.randn(3, 4, 5, 1),))),
38+
"randn_4d_dim3": (lambda: (Unflatten(3, (2, 2)), (torch.randn(1, 1, 5, 4),))),
3539
}
3640

3741

@@ -49,7 +53,33 @@ def test_unflatten_int_tosa_FP(test_data: test_data_t):
4953
@common.parametrize("test_data", Unflatten.test_data)
5054
def test_unflatten_int_tosa_INT(test_data: test_data_t):
5155
module, inputs = test_data()
52-
pipeline = TosaPipelineINT[input_t](
56+
pipeline = TosaPipelineINT[input_t](module, inputs, Unflatten.aten_op)
57+
pipeline.run()
58+
59+
60+
xfails = {
61+
"rand_3d_batch3": "Batch size > 1 currently not supported for FVP tests",
62+
"randn_4d_dim1": "Batch size > 1 currently not supported for FVP tests",
63+
}
64+
65+
66+
@common.parametrize("test_data", Unflatten.test_data, xfails=xfails, strict=False)
67+
@common.XfailIfNoCorstone300
68+
def test_unflatten_int_u55_INT(test_data: test_data_t):
69+
module, inputs = test_data()
70+
pipeline = EthosU55PipelineINT[input_t](
71+
module,
72+
inputs,
73+
Unflatten.aten_op,
74+
)
75+
pipeline.run()
76+
77+
78+
@common.parametrize("test_data", Unflatten.test_data, xfails=xfails, strict=False)
79+
@common.XfailIfNoCorstone320
80+
def test_unflatten_int_u85_INT(test_data: test_data_t):
81+
module, inputs = test_data()
82+
pipeline = EthosU85PipelineINT[input_t](
5383
module,
5484
inputs,
5585
Unflatten.aten_op,

backends/arm/test/ops/test_unsqueeze.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626

2727
class Unsqueeze(torch.nn.Module):
28-
shapes: list[int | Sequence[int]] = [5, (5, 5), (5, 4), (5, 4, 3)]
28+
shapes: list[int | Sequence[int]] = [5, (5, 5), (5, 4), (5, 4, 3), (1, 5, 4, 3)]
2929
test_parameters = {}
3030
for n in shapes:
3131
test_parameters[f"rand_{n}"] = (torch.randn(n),)

backends/arm/test/ops/test_view.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ class View(torch.nn.Module):
4444
"rand_4d_4_3": lambda: (torch.rand(5, 10, 1, 1), (1, 25, 2)),
4545
"rand_4d_4_2": lambda: (torch.rand(2, 50, 1, 1), (1, 100)),
4646
"rand_4d_2_4_same": lambda: (torch.rand(2, 3, 2, 3), (2, 3, 3, 2)),
47+
"rand_4d_5d": lambda: (torch.rand(1, 3, 4, 5), (1, 1, 4, 5, -1)),
48+
"rand_5d_5d": lambda: (torch.rand(1, 1, 4, 5, 6), (1, 1, 4, -1, 6)),
49+
"rand_5d_3d": lambda: (torch.rand(1, 1, 4, 5, 6), (2, 3, -1)),
50+
"rand_3d_5d": lambda: (torch.rand(4, 5, 6), (1, 1, 2, -1, 3)),
4751
}
4852

4953
rank_product_too_large = {
@@ -97,7 +101,9 @@ def test_view_tosa_INT(test_data: Tuple):
97101
}
98102

99103

100-
@common.parametrize("test_data", View.needs_transpose_tests, xfails=xfails)
104+
@common.parametrize(
105+
"test_data", View.needs_transpose_tests, xfails=xfails, strict=False
106+
)
101107
@common.XfailIfNoCorstone300
102108
def test_view_u55_INT(test_data: Tuple):
103109
test_tensor, new_shape = test_data()
@@ -151,7 +157,9 @@ def test_view_u55_INT_not_delegated(test_data: Tuple):
151157
pipeline.run()
152158

153159

154-
@common.parametrize("test_data", View.needs_transpose_tests, xfails=xfails)
160+
@common.parametrize(
161+
"test_data", View.needs_transpose_tests, xfails=xfails, strict=False
162+
)
155163
@common.XfailIfNoCorstone320
156164
def test_view_u85_INT(test_data: Tuple):
157165
test_tensor, new_shape = test_data()

examples/arm/setup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ fi
6060

6161
# Vela
6262
vela_repo_url="https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela"
63-
vela_rev="d37febc1715edf0d236c2ff555739a8a9aadcf9a"
63+
vela_rev="9a43a1bf26bfc7588358d7e6e6bb2613b4981a34"
6464

6565
# MLSDK dependencies
6666
mlsdk_manifest_dir="ml-sdk-for-vulkan-manifest"

0 commit comments

Comments
 (0)