Skip to content

Commit 0248897

Browse files
authored
NXP backend: Make the flow robust against input/output swapping. (#12890)
### Summary The NXP backend is now robust against the swapping the order of inputs/outputs in the model converter. Release notes: NXP ### Test plan test_neutron_backend.py tests this feature
1 parent 2e32bc2 commit 0248897

File tree

4 files changed

+182
-80
lines changed

4 files changed

+182
-80
lines changed

backends/nxp/neutron_node_extraction.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from dataclasses import dataclass
77

88
import numpy as np
9-
109
from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
1110
BuiltinOperator,
1211
)
@@ -15,6 +14,10 @@
1514

1615
@dataclass
1716
class NeutronNodeArtifacts:
17+
input_names: list[str]
18+
input_indices: list[int]
19+
output_names: list[str]
20+
output_indices: list[int]
1821
microcode: np.ndarray
1922
weights: np.ndarray
2023
kernels: np.ndarray
@@ -99,4 +102,42 @@ def extract_artifacts_from_neutron_node(
99102
microcode.dtype == weights.dtype == kernels.dtype == np.dtype("uint8")
100103
), "The Neutron Node uses unexpected data types."
101104

102-
return NeutronNodeArtifacts(microcode, weights, kernels)
105+
input_names = []
106+
input_indices = []
107+
graph_inputs = sub_graph.InputsAsNumpy()
108+
node_inputs = neutron_node.InputsAsNumpy()[:-3]
109+
for tensor_idx in node_inputs:
110+
which_graph_input = np.where(graph_inputs == tensor_idx)[0]
111+
assert (
112+
which_graph_input.size == 1
113+
), "Mismatch between Neutron Node inputs and graph inputs."
114+
input_indices.append(which_graph_input[0])
115+
input_names.append(sub_graph.Tensors(graph_inputs[which_graph_input[0]]).Name())
116+
117+
assert (
118+
neutron_node.OutputsLength() >= 2
119+
), f"The Neutron Node only has `{neutron_node.GetOutputsLen()}` outputs. Expected at least `2` including the scratch buffer."
120+
121+
output_names = []
122+
output_indices = []
123+
graph_outputs = sub_graph.OutputsAsNumpy()
124+
node_outputs = neutron_node.OutputsAsNumpy()[:-1]
125+
for tensor_idx in node_outputs:
126+
which_graph_output = np.where(graph_outputs == tensor_idx)[0]
127+
assert (
128+
which_graph_output.size == 1
129+
), "Mismatch between Neutron Node outputs and graph outputs."
130+
output_indices.append(which_graph_output[0])
131+
output_names.append(
132+
sub_graph.Tensors(graph_outputs[which_graph_output[0]]).Name()
133+
)
134+
135+
return NeutronNodeArtifacts(
136+
input_names,
137+
input_indices,
138+
output_names,
139+
output_indices,
140+
microcode,
141+
weights,
142+
kernels,
143+
)

backends/nxp/nxp_backend.py

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -245,39 +245,67 @@ def _format_string_for_array(self, array: np.ndarray) -> str:
245245

246246
return f"{array.size}s{self._padding_format_string_for_array(array)}"
247247

248-
def _create_payload_header(self, io_formats) -> np.ndarray:
248+
def _create_payload_header(self, io_formats, neutron_artifacts) -> np.ndarray:
249249
"""
250250
Create bytes header for returned payload. It contains information about
251251
input and output tensor formats. Tensors are ordered based on graph signature
252252
of ExportedProgram. Header schema:
253253
254-
+----------------------------------+-----------------------------------+
255-
| Input TensorFormats length (1B) | Output TensorFormats length (1B) |
256-
+----------------------------------+-----------------------------------+
257-
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
258-
+----------------------------------+-----------------------------------+
259-
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
260-
+----------------------------------+-----------------------------------+
254+
+----------------------------+-----------------------------+------------------------+
255+
| Neutron inputs length (1B) | Neutron outputs length (1B) | Input args length (1B) |
256+
+----------------------------+-----------+-----------------+------------------------+
257+
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
258+
+----------------------------------------+------------------------------------------+
259+
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
260+
+----------------------------------------+------------------------------------------+
261+
| 1st input map (1B) | [nth* input map (1B)] |
262+
+----------------------------------------+------------------------------------------+
263+
| 1st output map (1B) | [nth* output map (1B)] |
264+
+----------------------------------------+------------------------------------------+
261265
262266
:param io_formats: IO tensors formats.
263267
:return: Bytes representation of payload header.
264268
"""
265269
inputs = io_formats["inputs"]
266270
outputs = io_formats["outputs"]
267271

268-
assert len(inputs) < 256, "Models with more than 255 inputs are not supported."
269272
assert (
270-
len(outputs) < 256
273+
len(neutron_artifacts.input_indices) < 256
274+
), "Models with more than 255 inputs are not supported."
275+
assert (
276+
len(neutron_artifacts.output_indices) < 256
271277
), "Models with more than 255 outputs are not supported."
272278

273-
header_data = [len(inputs)]
274-
header_data.append(len(outputs))
279+
header_data = [len(neutron_artifacts.input_indices)]
280+
header_data.append(len(neutron_artifacts.output_indices))
281+
header_data.append(len(inputs))
275282

276-
for _tensor, tensor_format in inputs.items():
277-
header_data.append(1 if tensor_format == TensorFormat.CHANNELS_LAST else 0)
283+
for input_name in neutron_artifacts.input_names:
284+
try:
285+
header_data.append(
286+
1
287+
if inputs[input_name.decode()] == TensorFormat.CHANNELS_LAST
288+
else 0
289+
)
290+
except KeyError:
291+
raise AssertionError(
292+
f"Input tensor `{input_name.decode()}` not found in the converted model."
293+
)
278294

279-
for _tensor, tensor_format in outputs.items():
280-
header_data.append(1 if tensor_format == TensorFormat.CHANNELS_LAST else 0)
295+
for output_name in neutron_artifacts.output_names:
296+
try:
297+
header_data.append(
298+
1
299+
if outputs[output_name.decode()] == TensorFormat.CHANNELS_LAST
300+
else 0
301+
)
302+
except KeyError:
303+
raise AssertionError(
304+
f"Output tensor `{output_name.decode()}` not found in the converted model."
305+
)
306+
307+
header_data.extend(neutron_artifacts.input_indices)
308+
header_data.extend(neutron_artifacts.output_indices)
281309

282310
# noinspection PyTypeChecker
283311
return np.array(header_data, dtype=np.uint8)
@@ -314,9 +342,9 @@ def get_binary_payload(self, io_formats, neutron_model) -> bytes:
314342
315343
+----------------------------------------------------------------------------------------------------------------+
316344
| 16 bytes aligned blocks |
317-
+===========================+===========================+============================+===========================+
318-
| Input formats length (1B) | Output formats length (1B) | [nth* input format (1B)] | [nth* output format (1B)] |
319-
+---------------------------+--------------------------- +---------------------------+---------------------------+
345+
+================================================================================================================+
346+
| Header |
347+
+----------------------------------------------------------------------------------------------------------------+
320348
| Neutron microcode |
321349
+----------------------------------------------------------------------------------------------------------------+
322350
| Neutron weights |
@@ -331,9 +359,9 @@ def get_binary_payload(self, io_formats, neutron_model) -> bytes:
331359
:param neutron_model: Neutron model with single NeutronGraph node.
332360
:return: 16 bytes aligned binary payload.
333361
"""
334-
header = self._create_payload_header(io_formats)
335-
336362
# Extract the Neutron microcode, weights and kernels from the Neutron Node in the `neutron_model`.
337363
neutron_artifacts = extract_artifacts_from_neutron_node(neutron_model)
338364

365+
header = self._create_payload_header(io_formats, neutron_artifacts)
366+
339367
return self._pack_with_alignment(header, neutron_artifacts)

backends/nxp/runtime/NeutronBackend.cpp

Lines changed: 74 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,37 +25,53 @@ namespace neutron {
2525
#define ALIGN_SIZE(size) \
2626
((size + BUFFER_ALIGNMENT - 1) & (~(BUFFER_ALIGNMENT - 1)))
2727

28+
// clang-format off
2829
/* Header schema:
29-
+----------------------------------+-----------------------------------+
30-
| Input TensorFormats length (1B) | Output TensorFormats length (1B) |
31-
+----------------------------------+-----------------------------------+
32-
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
33-
+----------------------------------+-----------------------------------+
34-
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
35-
+----------------------------------+-----------------------------------+
30+
+----------------------------+-----------------------------+------------------------+
31+
| Neutron inputs length (1B) | Neutron outputs length (1B) | Input args length (1B) |
32+
+----------------------------+-----------+-----------------+------------------------+
33+
| 1st input tensor format (1B) | [nth* input tensor format (1B)] |
34+
+----------------------------------------+------------------------------------------+
35+
| 1st output tensor format (1B) | [nth* output tensor format (1B)] |
36+
+----------------------------------------+------------------------------------------+
37+
| 1st input map (1B) | [nth* input map (1B)] |
38+
+----------------------------------------+------------------------------------------+
39+
| 1st output map (1B) | [nth* output map (1B)] |
40+
+----------------------------------------+------------------------------------------+
3641
*/
42+
// clang-format on
3743
#define ITEM_SIZE 1 // 1 Byte
3844
#define INPUT_TENSOR_FORMAT_LEN_POS 0
3945
#define OUTPUT_TENSOR_FORMAT_LEN_POS 1
40-
#define INPUT_TENSOR_FORMAT_ARRAY_ADDR(base) (base + 2 * ITEM_SIZE)
46+
#define INPUT_ARGS_LEN_POS 2
47+
#define INPUT_TENSOR_FORMAT_ARRAY_ADDR(base) (base + 3 * ITEM_SIZE)
4148
#define OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(base) \
42-
(base + 2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS])
43-
#define PAYLOAD_ADDR(base) \
44-
(base + \
45-
ALIGN_SIZE( \
46-
2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS] + \
47-
base[OUTPUT_TENSOR_FORMAT_LEN_POS]))
49+
(base + 3 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS])
50+
#define INPUT_TENSOR_MAP_ARRAY_ADDR(base) \
51+
(base + 3 * ITEM_SIZE + 1 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \
52+
1 * base[OUTPUT_TENSOR_FORMAT_LEN_POS])
53+
#define OUTPUT_TENSOR_MAP_ARRAY_ADDR(base) \
54+
(base + 3 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \
55+
1 * base[OUTPUT_TENSOR_FORMAT_LEN_POS])
56+
#define PAYLOAD_ADDR(base) \
57+
(base + \
58+
ALIGN_SIZE( \
59+
3 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \
60+
2 * base[OUTPUT_TENSOR_FORMAT_LEN_POS]))
4861

4962
// Aggregate neutron model handle and data structures into one.
5063
typedef struct {
5164
int numInputs = 0;
5265
int numOutputs = 0;
66+
int numInputArgs = 0;
5367
uint32_t scratchSize = 0;
5468
NeutronModelConfig mcfg;
5569
NeutronDataConfig dcfg;
5670
NeutronModelHandle nmh = NULL;
5771
const uint8_t* inputTranspositionFlags;
5872
const uint8_t* outputTranspositionFlags;
73+
const uint8_t* inputMap;
74+
const uint8_t* outputMap;
5975
} NeutronConfig;
6076

6177
// Applied on outputs.
@@ -210,6 +226,15 @@ void transposeOutput(
210226
}
211227
}
212228

229+
bool multipleChannelsPresent(const ArrayRef<exec_aten::SizesType>& sizes) {
230+
size_t length = sizes.size();
231+
if (length < 3) {
232+
return true;
233+
}
234+
size_t C = sizes[length - 3];
235+
return C != 1;
236+
}
237+
213238
class NeutronBackend final : public PyTorchBackendInterface {
214239
public:
215240
NeutronBackend() {}
@@ -234,17 +259,19 @@ class NeutronBackend final : public PyTorchBackendInterface {
234259
// cfg->mcfg.microcode
235260
// cfg->mcfg.weights
236261
// cfg->mcfg.kernels
237-
const uint8_t* transpositionFlags =
262+
const uint8_t* payloadFlags =
238263
static_cast<const uint8_t*>(processed->data());
239-
int numInputs = transpositionFlags[INPUT_TENSOR_FORMAT_LEN_POS];
240-
int numOutputs = transpositionFlags[OUTPUT_TENSOR_FORMAT_LEN_POS];
241-
cfg->inputTranspositionFlags =
242-
INPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags);
264+
uint32_t numInputs = payloadFlags[INPUT_TENSOR_FORMAT_LEN_POS];
265+
uint32_t numOutputs = payloadFlags[OUTPUT_TENSOR_FORMAT_LEN_POS];
266+
cfg->numInputArgs = payloadFlags[INPUT_ARGS_LEN_POS];
267+
cfg->inputTranspositionFlags = INPUT_TENSOR_FORMAT_ARRAY_ADDR(payloadFlags);
243268
cfg->outputTranspositionFlags =
244-
OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags);
269+
OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(payloadFlags);
270+
cfg->inputMap = INPUT_TENSOR_MAP_ARRAY_ADDR(payloadFlags);
271+
cfg->outputMap = OUTPUT_TENSOR_MAP_ARRAY_ADDR(payloadFlags);
245272

246273
const uint32_t* buffer = static_cast<const uint32_t*>(
247-
static_cast<const void*> PAYLOAD_ADDR(transpositionFlags));
274+
static_cast<const void*> PAYLOAD_ADDR(payloadFlags));
248275
uint32_t magicWord = buffer[0];
249276
// Check valid microcode.
250277
if (magicWord != 0x64434D6E) {
@@ -314,39 +341,37 @@ class NeutronBackend final : public PyTorchBackendInterface {
314341
cfg->dcfg.outputs[cfg->numOutputs] =
315342
static_cast<void*>(context.allocate(cfg->scratchSize, 16));
316343

317-
// Set inputs and outputs from args.
344+
// Set inputs from args.
345+
// Transpose inputs if needed.
318346
for (int i = 0; i < cfg->numInputs; i++) {
319-
cfg->dcfg.inputs[i] = args[i]->toTensor().const_data_ptr();
320-
}
321-
for (int i = 0; i < cfg->numOutputs; i++) {
322-
cfg->dcfg.outputs[i] =
323-
args[cfg->numInputs + i]->toTensor().mutable_data_ptr();
324-
}
325-
326-
// Transpose inputs.
327-
for (int i = 0; i < cfg->numInputs; i++) {
328-
if (cfg->inputTranspositionFlags[i]) {
329-
if (args[i]->toTensor().sizes().size() < 3) {
347+
auto arg = args[cfg->inputMap[i]]->toTensor();
348+
if (cfg->inputTranspositionFlags[i] &&
349+
multipleChannelsPresent(arg.sizes())) {
350+
if (arg.sizes().size() < 3) {
330351
ET_LOG(Error, "Unable to transpose 1D and 2D input to channel last");
331352
return Error::InvalidProgram;
332353
}
333354
// Allocate buffer, the allocator is reset after each PTE instruction.
334-
void* buffer = context.allocate(args[i]->toTensor().nbytes(), 16);
355+
void* buffer = context.allocate(arg.nbytes());
335356
transposeInput(
336-
args[i]->toTensor().const_data_ptr(),
337-
buffer,
338-
args[i]->toTensor().sizes(),
339-
args[i]->toTensor().element_size());
357+
arg.const_data_ptr(), buffer, arg.sizes(), arg.element_size());
340358
cfg->dcfg.inputs[i] = buffer;
359+
} else {
360+
cfg->dcfg.inputs[i] = arg.const_data_ptr();
341361
}
342362
}
343-
// Redirect outputs.
363+
364+
// Set outputs from args.
365+
// Redirect outputs if needed before transposition.
344366
for (int i = 0; i < cfg->numOutputs; i++) {
345-
if (cfg->outputTranspositionFlags[i]) {
367+
auto arg = args[cfg->numInputArgs + cfg->outputMap[i]]->toTensor();
368+
if (cfg->outputTranspositionFlags[i] &&
369+
multipleChannelsPresent(arg.sizes())) {
346370
// Allocate buffer, the allocator is reset after each PTE instruction.
347-
void* buffer =
348-
context.allocate(args[cfg->numInputs + i]->toTensor().nbytes(), 16);
371+
void* buffer = context.allocate(arg.nbytes());
349372
cfg->dcfg.outputs[i] = buffer;
373+
} else {
374+
cfg->dcfg.outputs[i] = arg.mutable_data_ptr();
350375
}
351376
}
352377

@@ -368,17 +393,19 @@ class NeutronBackend final : public PyTorchBackendInterface {
368393

369394
// Transpose outputs.
370395
for (int i = 0; i < cfg->numOutputs; i++) {
371-
if (cfg->outputTranspositionFlags[i]) {
372-
if (args[cfg->numInputs + i]->toTensor().sizes().size() < 3) {
396+
auto arg = args[cfg->numInputArgs + cfg->outputMap[i]]->toTensor();
397+
if (cfg->outputTranspositionFlags[i] &&
398+
multipleChannelsPresent(arg.sizes())) {
399+
if (arg.sizes().size() < 3) {
373400
ET_LOG(
374401
Error, "Unable to transpose 1D and 2D output to channel first");
375402
return Error::InvalidProgram;
376403
}
377404
transposeOutput(
378405
cfg->dcfg.outputs[i],
379-
args[cfg->numInputs + i]->toTensor().mutable_data_ptr(),
380-
args[cfg->numInputs + i]->toTensor().sizes(),
381-
args[cfg->numInputs + i]->toTensor().element_size());
406+
arg.mutable_data_ptr(),
407+
arg.sizes(),
408+
arg.element_size());
382409
}
383410
}
384411

0 commit comments

Comments
 (0)