Skip to content

Commit 299fc73

Browse files
jiriocStrycekSimon
authored andcommitted
NXP backend: Make the flow robust against input/output swapping.
1 parent de0554d commit 299fc73

File tree

4 files changed

+201
-96
lines changed

4 files changed

+201
-96
lines changed

backends/nxp/neutron_node_extraction.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from dataclasses import dataclass
77

88
import numpy as np
9-
109
from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
1110
BuiltinOperator,
1211
)
@@ -15,6 +14,10 @@
1514

1615
@dataclass
1716
class NeutronNodeArtifacts:
17+
input_names: list[str]
18+
input_indices: list[int]
19+
output_names: list[str]
20+
output_indices: list[int]
1821
microcode: np.ndarray
1922
weights: np.ndarray
2023
kernels: np.ndarray
@@ -99,4 +102,42 @@ def extract_artifacts_from_neutron_node(
99102
microcode.dtype == weights.dtype == kernels.dtype == np.dtype("uint8")
100103
), "The Neutron Node uses unexpected data types."
101104

102-
return NeutronNodeArtifacts(microcode, weights, kernels)
105+
input_names = []
106+
input_indices = []
107+
graph_inputs = sub_graph.InputsAsNumpy()
108+
node_inputs = neutron_node.InputsAsNumpy()[:-3]
109+
for tensor_idx in node_inputs:
110+
which_graph_input = np.where(graph_inputs == tensor_idx)[0]
111+
assert (
112+
which_graph_input.size == 1
113+
), "Mismatch between Neutron Node inputs and graph inputs."
114+
input_indices.append(which_graph_input[0])
115+
input_names.append(sub_graph.Tensors(graph_inputs[which_graph_input[0]]).Name())
116+
117+
assert (
118+
neutron_node.OutputsLength() >= 2
119+
), f"The Neutron Node only has `{neutron_node.GetOutputsLen()}` outputs. Expected at least `2` including the scratch buffer."
120+
121+
output_names = []
122+
output_indices = []
123+
graph_outputs = sub_graph.OutputsAsNumpy()
124+
node_outputs = neutron_node.OutputsAsNumpy()[:-1]
125+
for tensor_idx in node_outputs:
126+
which_graph_output = np.where(graph_outputs == tensor_idx)[0]
127+
assert (
128+
which_graph_output.size == 1
129+
), "Mismatch between Neutron Node outputs and graph outputs."
130+
output_indices.append(which_graph_output[0])
131+
output_names.append(
132+
sub_graph.Tensors(graph_outputs[which_graph_output[0]]).Name()
133+
)
134+
135+
return NeutronNodeArtifacts(
136+
input_names,
137+
input_indices,
138+
output_names,
139+
output_indices,
140+
microcode,
141+
weights,
142+
kernels,
143+
)

backends/nxp/nxp_backend.py

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -245,39 +245,67 @@ def _format_string_for_array(self, array: np.ndarray) -> str:
245245

246246
return f"{array.size}s{self._padding_format_string_for_array(array)}"
247247

248-
def _create_payload_header(self, io_formats) -> np.ndarray:
248+
def _create_payload_header(self, io_formats, neutron_artifacts) -> np.ndarray:
249249
"""
250250
Create bytes header for returned payload. It contains information about
251251
input and output tensor formats. Tensors are ordered based on graph signature
252252
of ExportedProgram. Header schema:
253253
254-
+----------------------------------+-----------------------------------+
255-
| Input TensorFormats length (1B) | Output TensorFormats length (1B) |
256-
+----------------------------------+-----------------------------------+
257-
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
258-
+----------------------------------+-----------------------------------+
259-
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
260-
+----------------------------------+-----------------------------------+
254+
+----------------------------+-----------------------------+------------------------+
255+
| Neutron inputs length (1B) | Neutron outputs length (1B) | Input args length (1B) |
256+
+----------------------------+-----------+-----------------+------------------------+
257+
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
258+
+----------------------------------------+------------------------------------------+
259+
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
260+
+----------------------------------------+------------------------------------------+
261+
| 1st input map (1B) | [nth* input map (1B)] |
262+
+----------------------------------------+------------------------------------------+
263+
| 1st output map (1B) | [nth* output map (1B)] |
264+
+----------------------------------------+------------------------------------------+
261265
262266
:param io_formats: IO tensors formats.
263267
:return: Bytes representation of payload header.
264268
"""
265269
inputs = io_formats["inputs"]
266270
outputs = io_formats["outputs"]
267271

268-
assert len(inputs) < 256, "Models with more than 255 inputs are not supported."
269272
assert (
270-
len(outputs) < 256
273+
len(neutron_artifacts.input_indices) < 256
274+
), "Models with more than 255 inputs are not supported."
275+
assert (
276+
len(neutron_artifacts.output_indices) < 256
271277
), "Models with more than 255 outputs are not supported."
272278

273-
header_data = [len(inputs)]
274-
header_data.append(len(outputs))
279+
header_data = [len(neutron_artifacts.input_indices)]
280+
header_data.append(len(neutron_artifacts.output_indices))
281+
header_data.append(len(inputs))
275282

276-
for _tensor, tensor_format in inputs.items():
277-
header_data.append(1 if tensor_format == TensorFormat.CHANNELS_LAST else 0)
283+
for input_name in neutron_artifacts.input_names:
284+
try:
285+
header_data.append(
286+
1
287+
if inputs[input_name.decode()] == TensorFormat.CHANNELS_LAST
288+
else 0
289+
)
290+
except KeyError:
291+
raise AssertionError(
292+
f"Input tensor `{input_name.decode()}` not found in the converted model."
293+
)
278294

279-
for _tensor, tensor_format in outputs.items():
280-
header_data.append(1 if tensor_format == TensorFormat.CHANNELS_LAST else 0)
295+
for output_name in neutron_artifacts.output_names:
296+
try:
297+
header_data.append(
298+
1
299+
if outputs[output_name.decode()] == TensorFormat.CHANNELS_LAST
300+
else 0
301+
)
302+
except KeyError:
303+
raise AssertionError(
304+
f"Output tensor `{output_name.decode()}` not found in the converted model."
305+
)
306+
307+
header_data.extend(neutron_artifacts.input_indices)
308+
header_data.extend(neutron_artifacts.output_indices)
281309

282310
# noinspection PyTypeChecker
283311
return np.array(header_data, dtype=np.uint8)
@@ -314,9 +342,9 @@ def get_binary_payload(self, io_formats, neutron_model) -> bytes:
314342
315343
+----------------------------------------------------------------------------------------------------------------+
316344
| 16 bytes aligned blocks |
317-
+===========================+===========================+============================+===========================+
318-
| Input formats length (1B) | Output formats length (1B) | [nth* input format (1B)] | [nth* output format (1B)] |
319-
+---------------------------+--------------------------- +---------------------------+---------------------------+
345+
+================================================================================================================+
346+
| Header |
347+
+----------------------------------------------------------------------------------------------------------------+
320348
| Neutron microcode |
321349
+----------------------------------------------------------------------------------------------------------------+
322350
| Neutron weights |
@@ -331,9 +359,9 @@ def get_binary_payload(self, io_formats, neutron_model) -> bytes:
331359
:param neutron_model: Neutron model with single NeutronGraph node.
332360
:return: 16 bytes aligned binary payload.
333361
"""
334-
header = self._create_payload_header(io_formats)
335-
336362
# Extract the Neutron microcode, weights and kernels from the Neutron Node in the `neutron_model`.
337363
neutron_artifacts = extract_artifacts_from_neutron_node(neutron_model)
338364

365+
header = self._create_payload_header(io_formats, neutron_artifacts)
366+
339367
return self._pack_with_alignment(header, neutron_artifacts)

backends/nxp/runtime/NeutronBackend.cpp

Lines changed: 72 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -26,36 +26,40 @@ namespace neutron {
2626
((size + BUFFER_ALIGNMENT - 1) & (~(BUFFER_ALIGNMENT - 1)))
2727

2828
/* Header schema:
29-
+----------------------------------+-----------------------------------+
30-
| Input TensorFormats length (1B) | Output TensorFormats length (1B) |
31-
+----------------------------------+-----------------------------------+
32-
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
33-
+----------------------------------+-----------------------------------+
34-
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
35-
+----------------------------------+-----------------------------------+
29+
+----------------------------+-----------------------------+------------------------+
30+
| Neutron inputs length (1B) | Neutron outputs length (1B) | Input args length (1B) |
31+
+----------------------------+-----------+-----------------+------------------------+
32+
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
33+
+----------------------------------------+------------------------------------------+
34+
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
35+
+----------------------------------------+------------------------------------------+
36+
| 1st input map (1B) | [nth* input map (1B)] |
37+
+----------------------------------------+------------------------------------------+
38+
| 1st output map (1B) | [nth* output map (1B)] |
39+
+----------------------------------------+------------------------------------------+
3640
*/
3741
#define ITEM_SIZE 1 // 1 Byte
3842
#define INPUT_TENSOR_FORMAT_LEN_POS 0
3943
#define OUTPUT_TENSOR_FORMAT_LEN_POS 1
40-
#define INPUT_TENSOR_FORMAT_ARRAY_ADDR(base) (base + 2 * ITEM_SIZE)
41-
#define OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(base) \
42-
(base + 2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS])
43-
#define PAYLOAD_ADDR(base) \
44-
(base + \
45-
ALIGN_SIZE( \
46-
2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS] + \
47-
base[OUTPUT_TENSOR_FORMAT_LEN_POS]))
44+
#define INPUT_ARGS_LEN_POS 2
45+
#define INPUT_TENSOR_FORMAT_ARRAY_ADDR(base) (base + 3 * ITEM_SIZE)
46+
#define OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(base) (base + 3 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS])
47+
#define INPUT_TENSOR_MAP_ARRAY_ADDR(base) (base + 3 * ITEM_SIZE + 1 * base[INPUT_TENSOR_FORMAT_LEN_POS] + 1 * base[OUTPUT_TENSOR_FORMAT_LEN_POS])
48+
#define OUTPUT_TENSOR_MAP_ARRAY_ADDR(base) (base + 3 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + 1 * base[OUTPUT_TENSOR_FORMAT_LEN_POS])
49+
#define PAYLOAD_ADDR(base) (base + ALIGN_SIZE(3 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + 2 * base[OUTPUT_TENSOR_FORMAT_LEN_POS]))
4850

4951
// Aggregate neutron model handle and data structures into one.
5052
typedef struct {
51-
int numInputs = 0;
52-
int numOutputs = 0;
53-
uint32_t scratchSize = 0;
54-
NeutronModelConfig mcfg;
55-
NeutronDataConfig dcfg;
56-
NeutronModelHandle nmh = NULL;
57-
const uint8_t* inputTranspositionFlags;
58-
const uint8_t* outputTranspositionFlags;
53+
int numInputs = 0;
54+
int numOutputs = 0;
55+
int numInputArgs = 0;
56+
NeutronModelConfig mcfg;
57+
NeutronDataConfig dcfg;
58+
NeutronModelHandle nmh = NULL;
59+
const uint8_t* inputTranspositionFlags;
60+
const uint8_t* outputTranspositionFlags;
61+
const uint8_t* inputMap;
62+
const uint8_t* outputMap;
5963
} NeutronConfig;
6064

6165
// Applied on outputs.
@@ -210,6 +214,15 @@ void transposeOutput(
210214
}
211215
}
212216

217+
bool multipleChannelsPresent(const ArrayRef<exec_aten::SizesType>& sizes) {
218+
size_t length = sizes.size();
219+
if (length < 3) {
220+
return true;
221+
}
222+
size_t C = sizes[length - 3];
223+
return C != 1;
224+
}
225+
213226
class NeutronBackend final : public PyTorchBackendInterface {
214227
public:
215228
NeutronBackend() {}
@@ -234,17 +247,17 @@ class NeutronBackend final : public PyTorchBackendInterface {
234247
// cfg->mcfg.microcode
235248
// cfg->mcfg.weights
236249
// cfg->mcfg.kernels
237-
const uint8_t* transpositionFlags =
238-
static_cast<const uint8_t*>(processed->data());
239-
int numInputs = transpositionFlags[INPUT_TENSOR_FORMAT_LEN_POS];
240-
int numOutputs = transpositionFlags[OUTPUT_TENSOR_FORMAT_LEN_POS];
241-
cfg->inputTranspositionFlags =
242-
INPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags);
243-
cfg->outputTranspositionFlags =
244-
OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags);
250+
const uint8_t* payloadFlags = static_cast<const uint8_t*>(processed->data());
251+
uint32_t numInputs = payloadFlags[INPUT_TENSOR_FORMAT_LEN_POS];
252+
uint32_t numOutputs = payloadFlags[OUTPUT_TENSOR_FORMAT_LEN_POS];
253+
cfg->numInputArgs = payloadFlags[INPUT_ARGS_LEN_POS];
254+
cfg->inputTranspositionFlags = INPUT_TENSOR_FORMAT_ARRAY_ADDR(payloadFlags);
255+
cfg->outputTranspositionFlags = OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(payloadFlags);
256+
cfg->inputMap = INPUT_TENSOR_MAP_ARRAY_ADDR(payloadFlags);
257+
cfg->outputMap = OUTPUT_TENSOR_MAP_ARRAY_ADDR(payloadFlags);
245258

246259
const uint32_t* buffer = static_cast<const uint32_t*>(
247-
static_cast<const void*> PAYLOAD_ADDR(transpositionFlags));
260+
static_cast<const void*>PAYLOAD_ADDR(payloadFlags));
248261
uint32_t magicWord = buffer[0];
249262
// Check valid microcode.
250263
if (magicWord != 0x64434D6E) {
@@ -314,39 +327,38 @@ class NeutronBackend final : public PyTorchBackendInterface {
314327
cfg->dcfg.outputs[cfg->numOutputs] =
315328
static_cast<void*>(context.allocate(cfg->scratchSize, 16));
316329

317-
// Set inputs and outputs from args.
330+
// Set inputs from args.
331+
// Transpose inputs if needed.
318332
for (int i = 0; i < cfg->numInputs; i++) {
319-
cfg->dcfg.inputs[i] = args[i]->toTensor().const_data_ptr();
320-
}
321-
for (int i = 0; i < cfg->numOutputs; i++) {
322-
cfg->dcfg.outputs[i] =
323-
args[cfg->numInputs + i]->toTensor().mutable_data_ptr();
324-
}
325-
326-
// Transpose inputs.
327-
for (int i = 0; i < cfg->numInputs; i++) {
328-
if (cfg->inputTranspositionFlags[i]) {
329-
if (args[i]->toTensor().sizes().size() < 3) {
333+
auto arg = args[cfg->inputMap[i]]->toTensor();
334+
if (cfg->inputTranspositionFlags[i] && multipleChannelsPresent(arg.sizes())) {
335+
if (arg.sizes().size() < 3) {
330336
ET_LOG(Error, "Unable to transpose 1D and 2D input to channel last");
331337
return Error::InvalidProgram;
332338
}
333339
// Allocate buffer, the allocator is reset after each PTE instruction.
334-
void* buffer = context.allocate(args[i]->toTensor().nbytes(), 16);
340+
void* buffer = context.allocate(arg.nbytes());
335341
transposeInput(
336-
args[i]->toTensor().const_data_ptr(),
342+
arg.const_data_ptr(),
337343
buffer,
338-
args[i]->toTensor().sizes(),
339-
args[i]->toTensor().element_size());
344+
arg.sizes(),
345+
arg.element_size());
340346
cfg->dcfg.inputs[i] = buffer;
347+
} else {
348+
cfg->dcfg.inputs[i] = arg.const_data_ptr();
341349
}
342350
}
343-
// Redirect outputs.
351+
352+
// Set outputs from args.
353+
// Redirect outputs if needed before transposition.
344354
for (int i = 0; i < cfg->numOutputs; i++) {
345-
if (cfg->outputTranspositionFlags[i]) {
355+
auto arg = args[cfg->numInputArgs + cfg->outputMap[i]]->toTensor();
356+
if (cfg->outputTranspositionFlags[i] && multipleChannelsPresent(arg.sizes())) {
346357
// Allocate buffer, the allocator is reset after each PTE instruction.
347-
void* buffer =
348-
context.allocate(args[cfg->numInputs + i]->toTensor().nbytes(), 16);
358+
void* buffer = context.allocate(arg.nbytes());
349359
cfg->dcfg.outputs[i] = buffer;
360+
} else {
361+
cfg->dcfg.outputs[i] = arg.mutable_data_ptr();
350362
}
351363
}
352364

@@ -368,17 +380,16 @@ class NeutronBackend final : public PyTorchBackendInterface {
368380

369381
// Transpose outputs.
370382
for (int i = 0; i < cfg->numOutputs; i++) {
371-
if (cfg->outputTranspositionFlags[i]) {
372-
if (args[cfg->numInputs + i]->toTensor().sizes().size() < 3) {
373-
ET_LOG(
374-
Error, "Unable to transpose 1D and 2D output to channel first");
383+
auto arg = args[cfg->numInputArgs + cfg->outputMap[i]]->toTensor();
384+
if (cfg->outputTranspositionFlags[i] && multipleChannelsPresent(arg.sizes())) {
385+
if (arg.sizes().size() < 3) {
386+
ET_LOG(Error, "Unable to transpose 1D and 2D output to channel first");
375387
return Error::InvalidProgram;
376388
}
377-
transposeOutput(
378-
cfg->dcfg.outputs[i],
379-
args[cfg->numInputs + i]->toTensor().mutable_data_ptr(),
380-
args[cfg->numInputs + i]->toTensor().sizes(),
381-
args[cfg->numInputs + i]->toTensor().element_size());
389+
transposeOutput(cfg->dcfg.outputs[i],
390+
arg.mutable_data_ptr(),
391+
arg.sizes(),
392+
arg.element_size());
382393
}
383394
}
384395

0 commit comments

Comments
 (0)