Skip to content

Commit 03abf61

Browse files
jiriocStrycekSimon
authored andcommitted
NXP backend: Make the flow robust against input/output swapping.
1 parent de0554d commit 03abf61

File tree

4 files changed

+182
-81
lines changed

4 files changed

+182
-81
lines changed

backends/nxp/neutron_node_extraction.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from dataclasses import dataclass
77

88
import numpy as np
9-
109
from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
1110
BuiltinOperator,
1211
)
@@ -15,6 +14,10 @@
1514

1615
@dataclass
1716
class NeutronNodeArtifacts:
17+
input_names: list[str]
18+
input_indices: list[int]
19+
output_names: list[str]
20+
output_indices: list[int]
1821
microcode: np.ndarray
1922
weights: np.ndarray
2023
kernels: np.ndarray
@@ -99,4 +102,42 @@ def extract_artifacts_from_neutron_node(
99102
microcode.dtype == weights.dtype == kernels.dtype == np.dtype("uint8")
100103
), "The Neutron Node uses unexpected data types."
101104

102-
return NeutronNodeArtifacts(microcode, weights, kernels)
105+
input_names = []
106+
input_indices = []
107+
graph_inputs = sub_graph.InputsAsNumpy()
108+
node_inputs = neutron_node.InputsAsNumpy()[:-3]
109+
for tensor_idx in node_inputs:
110+
which_graph_input = np.where(graph_inputs == tensor_idx)[0]
111+
assert (
112+
which_graph_input.size == 1
113+
), "Mismatch between Neutron Node inputs and graph inputs."
114+
input_indices.append(which_graph_input[0])
115+
input_names.append(sub_graph.Tensors(graph_inputs[which_graph_input[0]]).Name())
116+
117+
assert (
118+
neutron_node.OutputsLength() >= 2
119+
), f"The Neutron Node only has `{neutron_node.GetOutputsLen()}` outputs. Expected at least `2` including the scratch buffer."
120+
121+
output_names = []
122+
output_indices = []
123+
graph_outputs = sub_graph.OutputsAsNumpy()
124+
node_outputs = neutron_node.OutputsAsNumpy()[:-1]
125+
for tensor_idx in node_outputs:
126+
which_graph_output = np.where(graph_outputs == tensor_idx)[0]
127+
assert (
128+
which_graph_output.size == 1
129+
), "Mismatch between Neutron Node outputs and graph outputs."
130+
output_indices.append(which_graph_output[0])
131+
output_names.append(
132+
sub_graph.Tensors(graph_outputs[which_graph_output[0]]).Name()
133+
)
134+
135+
return NeutronNodeArtifacts(
136+
input_names,
137+
input_indices,
138+
output_names,
139+
output_indices,
140+
microcode,
141+
weights,
142+
kernels,
143+
)

backends/nxp/nxp_backend.py

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -245,39 +245,67 @@ def _format_string_for_array(self, array: np.ndarray) -> str:
245245

246246
return f"{array.size}s{self._padding_format_string_for_array(array)}"
247247

248-
def _create_payload_header(self, io_formats) -> np.ndarray:
248+
def _create_payload_header(self, io_formats, neutron_artifacts) -> np.ndarray:
249249
"""
250250
Create bytes header for returned payload. It contains information about
251251
input and output tensor formats. Tensors are ordered based on graph signature
252252
of ExportedProgram. Header schema:
253253
254-
+----------------------------------+-----------------------------------+
255-
| Input TensorFormats length (1B) | Output TensorFormats length (1B) |
256-
+----------------------------------+-----------------------------------+
257-
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
258-
+----------------------------------+-----------------------------------+
259-
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
260-
+----------------------------------+-----------------------------------+
254+
+----------------------------+-----------------------------+------------------------+
255+
| Neutron inputs length (1B) | Neutron outputs length (1B) | Input args length (1B) |
256+
+----------------------------+-----------+-----------------+------------------------+
257+
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
258+
+----------------------------------------+------------------------------------------+
259+
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
260+
+----------------------------------------+------------------------------------------+
261+
| 1st input map (1B) | [nth* input map (1B)] |
262+
+----------------------------------------+------------------------------------------+
263+
| 1st output map (1B) | [nth* output map (1B)] |
264+
+----------------------------------------+------------------------------------------+
261265
262266
:param io_formats: IO tensors formats.
263267
:return: Bytes representation of payload header.
264268
"""
265269
inputs = io_formats["inputs"]
266270
outputs = io_formats["outputs"]
267271

268-
assert len(inputs) < 256, "Models with more than 255 inputs are not supported."
269272
assert (
270-
len(outputs) < 256
273+
len(neutron_artifacts.input_indices) < 256
274+
), "Models with more than 255 inputs are not supported."
275+
assert (
276+
len(neutron_artifacts.output_indices) < 256
271277
), "Models with more than 255 outputs are not supported."
272278

273-
header_data = [len(inputs)]
274-
header_data.append(len(outputs))
279+
header_data = [len(neutron_artifacts.input_indices)]
280+
header_data.append(len(neutron_artifacts.output_indices))
281+
header_data.append(len(inputs))
275282

276-
for _tensor, tensor_format in inputs.items():
277-
header_data.append(1 if tensor_format == TensorFormat.CHANNELS_LAST else 0)
283+
for input_name in neutron_artifacts.input_names:
284+
try:
285+
header_data.append(
286+
1
287+
if inputs[input_name.decode()] == TensorFormat.CHANNELS_LAST
288+
else 0
289+
)
290+
except KeyError:
291+
raise AssertionError(
292+
f"Input tensor `{input_name.decode()}` not found in the converted model."
293+
)
278294

279-
for _tensor, tensor_format in outputs.items():
280-
header_data.append(1 if tensor_format == TensorFormat.CHANNELS_LAST else 0)
295+
for output_name in neutron_artifacts.output_names:
296+
try:
297+
header_data.append(
298+
1
299+
if outputs[output_name.decode()] == TensorFormat.CHANNELS_LAST
300+
else 0
301+
)
302+
except KeyError:
303+
raise AssertionError(
304+
f"Output tensor `{output_name.decode()}` not found in the converted model."
305+
)
306+
307+
header_data.extend(neutron_artifacts.input_indices)
308+
header_data.extend(neutron_artifacts.output_indices)
281309

282310
# noinspection PyTypeChecker
283311
return np.array(header_data, dtype=np.uint8)
@@ -314,9 +342,9 @@ def get_binary_payload(self, io_formats, neutron_model) -> bytes:
314342
315343
+----------------------------------------------------------------------------------------------------------------+
316344
| 16 bytes aligned blocks |
317-
+===========================+===========================+============================+===========================+
318-
| Input formats length (1B) | Output formats length (1B) | [nth* input format (1B)] | [nth* output format (1B)] |
319-
+---------------------------+--------------------------- +---------------------------+---------------------------+
345+
+================================================================================================================+
346+
| Header |
347+
+----------------------------------------------------------------------------------------------------------------+
320348
| Neutron microcode |
321349
+----------------------------------------------------------------------------------------------------------------+
322350
| Neutron weights |
@@ -331,9 +359,9 @@ def get_binary_payload(self, io_formats, neutron_model) -> bytes:
331359
:param neutron_model: Neutron model with single NeutronGraph node.
332360
:return: 16 bytes aligned binary payload.
333361
"""
334-
header = self._create_payload_header(io_formats)
335-
336362
# Extract the Neutron microcode, weights and kernels from the Neutron Node in the `neutron_model`.
337363
neutron_artifacts = extract_artifacts_from_neutron_node(neutron_model)
338364

365+
header = self._create_payload_header(io_formats, neutron_artifacts)
366+
339367
return self._pack_with_alignment(header, neutron_artifacts)

backends/nxp/runtime/NeutronBackend.cpp

Lines changed: 74 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -25,37 +25,52 @@ namespace neutron {
2525
#define ALIGN_SIZE(size) \
2626
((size + BUFFER_ALIGNMENT - 1) & (~(BUFFER_ALIGNMENT - 1)))
2727

28+
// clang-format off
2829
/* Header schema:
29-
+----------------------------------+-----------------------------------+
30-
| Input TensorFormats length (1B) | Output TensorFormats length (1B) |
31-
+----------------------------------+-----------------------------------+
32-
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
33-
+----------------------------------+-----------------------------------+
34-
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
35-
+----------------------------------+-----------------------------------+
30+
+----------------------------+-----------------------------+------------------------+
31+
| Neutron inputs length (1B) | Neutron outputs length (1B) | Input args length (1B) |
32+
+----------------------------+-----------+-----------------+------------------------+
33+
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
34+
+----------------------------------------+------------------------------------------+
35+
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
36+
+----------------------------------------+------------------------------------------+
37+
| 1st input map (1B) | [nth* input map (1B)] |
38+
+----------------------------------------+------------------------------------------+
39+
| 1st output map (1B) | [nth* output map (1B)] |
40+
+----------------------------------------+------------------------------------------+
3641
*/
42+
// clang-format on
3743
#define ITEM_SIZE 1 // 1 Byte
3844
#define INPUT_TENSOR_FORMAT_LEN_POS 0
3945
#define OUTPUT_TENSOR_FORMAT_LEN_POS 1
40-
#define INPUT_TENSOR_FORMAT_ARRAY_ADDR(base) (base + 2 * ITEM_SIZE)
46+
#define INPUT_ARGS_LEN_POS 2
47+
#define INPUT_TENSOR_FORMAT_ARRAY_ADDR(base) (base + 3 * ITEM_SIZE)
4148
#define OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(base) \
42-
(base + 2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS])
43-
#define PAYLOAD_ADDR(base) \
44-
(base + \
45-
ALIGN_SIZE( \
46-
2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS] + \
47-
base[OUTPUT_TENSOR_FORMAT_LEN_POS]))
49+
(base + 3 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS])
50+
#define INPUT_TENSOR_MAP_ARRAY_ADDR(base) \
51+
(base + 3 * ITEM_SIZE + 1 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \
52+
1 * base[OUTPUT_TENSOR_FORMAT_LEN_POS])
53+
#define OUTPUT_TENSOR_MAP_ARRAY_ADDR(base) \
54+
(base + 3 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \
55+
1 * base[OUTPUT_TENSOR_FORMAT_LEN_POS])
56+
#define PAYLOAD_ADDR(base) \
57+
(base + \
58+
ALIGN_SIZE( \
59+
3 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \
60+
2 * base[OUTPUT_TENSOR_FORMAT_LEN_POS]))
4861

4962
// Aggregate neutron model handle and data structures into one.
5063
typedef struct {
5164
int numInputs = 0;
5265
int numOutputs = 0;
53-
uint32_t scratchSize = 0;
66+
int numInputArgs = 0;
5467
NeutronModelConfig mcfg;
5568
NeutronDataConfig dcfg;
5669
NeutronModelHandle nmh = NULL;
5770
const uint8_t* inputTranspositionFlags;
5871
const uint8_t* outputTranspositionFlags;
72+
const uint8_t* inputMap;
73+
const uint8_t* outputMap;
5974
} NeutronConfig;
6075

6176
// Applied on outputs.
@@ -210,6 +225,15 @@ void transposeOutput(
210225
}
211226
}
212227

228+
bool multipleChannelsPresent(const ArrayRef<exec_aten::SizesType>& sizes) {
229+
size_t length = sizes.size();
230+
if (length < 3) {
231+
return true;
232+
}
233+
size_t C = sizes[length - 3];
234+
return C != 1;
235+
}
236+
213237
class NeutronBackend final : public PyTorchBackendInterface {
214238
public:
215239
NeutronBackend() {}
@@ -234,17 +258,19 @@ class NeutronBackend final : public PyTorchBackendInterface {
234258
// cfg->mcfg.microcode
235259
// cfg->mcfg.weights
236260
// cfg->mcfg.kernels
237-
const uint8_t* transpositionFlags =
261+
const uint8_t* payloadFlags =
238262
static_cast<const uint8_t*>(processed->data());
239-
int numInputs = transpositionFlags[INPUT_TENSOR_FORMAT_LEN_POS];
240-
int numOutputs = transpositionFlags[OUTPUT_TENSOR_FORMAT_LEN_POS];
241-
cfg->inputTranspositionFlags =
242-
INPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags);
263+
uint32_t numInputs = payloadFlags[INPUT_TENSOR_FORMAT_LEN_POS];
264+
uint32_t numOutputs = payloadFlags[OUTPUT_TENSOR_FORMAT_LEN_POS];
265+
cfg->numInputArgs = payloadFlags[INPUT_ARGS_LEN_POS];
266+
cfg->inputTranspositionFlags = INPUT_TENSOR_FORMAT_ARRAY_ADDR(payloadFlags);
243267
cfg->outputTranspositionFlags =
244-
OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags);
268+
OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(payloadFlags);
269+
cfg->inputMap = INPUT_TENSOR_MAP_ARRAY_ADDR(payloadFlags);
270+
cfg->outputMap = OUTPUT_TENSOR_MAP_ARRAY_ADDR(payloadFlags);
245271

246272
const uint32_t* buffer = static_cast<const uint32_t*>(
247-
static_cast<const void*> PAYLOAD_ADDR(transpositionFlags));
273+
static_cast<const void*> PAYLOAD_ADDR(payloadFlags));
248274
uint32_t magicWord = buffer[0];
249275
// Check valid microcode.
250276
if (magicWord != 0x64434D6E) {
@@ -314,39 +340,37 @@ class NeutronBackend final : public PyTorchBackendInterface {
314340
cfg->dcfg.outputs[cfg->numOutputs] =
315341
static_cast<void*>(context.allocate(cfg->scratchSize, 16));
316342

317-
// Set inputs and outputs from args.
343+
// Set inputs from args.
344+
// Transpose inputs if needed.
318345
for (int i = 0; i < cfg->numInputs; i++) {
319-
cfg->dcfg.inputs[i] = args[i]->toTensor().const_data_ptr();
320-
}
321-
for (int i = 0; i < cfg->numOutputs; i++) {
322-
cfg->dcfg.outputs[i] =
323-
args[cfg->numInputs + i]->toTensor().mutable_data_ptr();
324-
}
325-
326-
// Transpose inputs.
327-
for (int i = 0; i < cfg->numInputs; i++) {
328-
if (cfg->inputTranspositionFlags[i]) {
329-
if (args[i]->toTensor().sizes().size() < 3) {
346+
auto arg = args[cfg->inputMap[i]]->toTensor();
347+
if (cfg->inputTranspositionFlags[i] &&
348+
multipleChannelsPresent(arg.sizes())) {
349+
if (arg.sizes().size() < 3) {
330350
ET_LOG(Error, "Unable to transpose 1D and 2D input to channel last");
331351
return Error::InvalidProgram;
332352
}
333353
// Allocate buffer, the allocator is reset after each PTE instruction.
334-
void* buffer = context.allocate(args[i]->toTensor().nbytes(), 16);
354+
void* buffer = context.allocate(arg.nbytes());
335355
transposeInput(
336-
args[i]->toTensor().const_data_ptr(),
337-
buffer,
338-
args[i]->toTensor().sizes(),
339-
args[i]->toTensor().element_size());
356+
arg.const_data_ptr(), buffer, arg.sizes(), arg.element_size());
340357
cfg->dcfg.inputs[i] = buffer;
358+
} else {
359+
cfg->dcfg.inputs[i] = arg.const_data_ptr();
341360
}
342361
}
343-
// Redirect outputs.
362+
363+
// Set outputs from args.
364+
// Redirect outputs if needed before transposition.
344365
for (int i = 0; i < cfg->numOutputs; i++) {
345-
if (cfg->outputTranspositionFlags[i]) {
366+
auto arg = args[cfg->numInputArgs + cfg->outputMap[i]]->toTensor();
367+
if (cfg->outputTranspositionFlags[i] &&
368+
multipleChannelsPresent(arg.sizes())) {
346369
// Allocate buffer, the allocator is reset after each PTE instruction.
347-
void* buffer =
348-
context.allocate(args[cfg->numInputs + i]->toTensor().nbytes(), 16);
370+
void* buffer = context.allocate(arg.nbytes());
349371
cfg->dcfg.outputs[i] = buffer;
372+
} else {
373+
cfg->dcfg.outputs[i] = arg.mutable_data_ptr();
350374
}
351375
}
352376

@@ -368,17 +392,19 @@ class NeutronBackend final : public PyTorchBackendInterface {
368392

369393
// Transpose outputs.
370394
for (int i = 0; i < cfg->numOutputs; i++) {
371-
if (cfg->outputTranspositionFlags[i]) {
372-
if (args[cfg->numInputs + i]->toTensor().sizes().size() < 3) {
395+
auto arg = args[cfg->numInputArgs + cfg->outputMap[i]]->toTensor();
396+
if (cfg->outputTranspositionFlags[i] &&
397+
multipleChannelsPresent(arg.sizes())) {
398+
if (arg.sizes().size() < 3) {
373399
ET_LOG(
374400
Error, "Unable to transpose 1D and 2D output to channel first");
375401
return Error::InvalidProgram;
376402
}
377403
transposeOutput(
378404
cfg->dcfg.outputs[i],
379-
args[cfg->numInputs + i]->toTensor().mutable_data_ptr(),
380-
args[cfg->numInputs + i]->toTensor().sizes(),
381-
args[cfg->numInputs + i]->toTensor().element_size());
405+
arg.mutable_data_ptr(),
406+
arg.sizes(),
407+
arg.element_size());
382408
}
383409
}
384410

0 commit comments

Comments
 (0)