samples: tflite_ethosu: support numaker_m55m1

ccli8 · jhedberg · commit ace13711466c · 2025-10-02T15:15:49.000+03:00
This supports nuvoton numaker_m55m1 board, including.

Besides, re-organize for new target and Ethos-U support, including:
1. Keep only one model originally for MPS3 target
2. Add guide to regenerate Vela-compiled model for targets supporting
   Ethos-U
3. Support running non-Vela-compiled model for targets supporting no
   Ethos-U

Signed-off-by: Chun-Chieh Li &lt;ccli8@nuvoton.com&gt;
diff --git a/samples/modules/tflite-micro/tflm_ethosu/CMakeLists.txt b/samples/modules/tflite-micro/tflm_ethosu/CMakeLists.txt
@@ -8,8 +8,11 @@ find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
 
 project(tflm_ethosu_app)
 
+# Choose model
 target_include_directories(app PRIVATE src/models/keyword_spotting_cnn_small_int8)
 
 target_sources(app PRIVATE src/main.cpp src/inference_process.cpp)
 
-zephyr_linker_sources(SECTIONS linker.ld)
+# Add platform specific linker source snippet
+zephyr_linker_sources_ifdef(CONFIG_SOC_SERIES_MPS3 SECTIONS linker.ld)
+zephyr_linker_sources_ifdef(CONFIG_SOC_SERIES_MPS4 SECTIONS linker.ld)
diff --git a/samples/modules/tflite-micro/tflm_ethosu/Kconfig b/samples/modules/tflite-micro/tflm_ethosu/Kconfig
@@ -1,9 +1,15 @@
 # Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
 # SPDX-License-Identifier: Apache-2.0
 
-config TFLM_ETHOSU_TAINT_BLOBS
+config TAINT_BLOBS_TFLM
 	bool
 	default y
 	select TAINT_BLOBS
 
+config TAINT_BLOBS_TFLM_ETHOSU
+	bool "Choose Vela-compiled model targeting Ethos-U"
+	default y
+	depends on ETHOS_U_ARM || ETHOS_U_NUMAKER
+	select TAINT_BLOBS_TFLM
+
 source "Kconfig.zephyr"
diff --git a/samples/modules/tflite-micro/tflm_ethosu/README.rst b/samples/modules/tflite-micro/tflm_ethosu/README.rst
@@ -19,6 +19,67 @@ where the operators supported by Ethos-U have been replaced by an Ethos-U custom
 operator. In an ideal case the complete network would be replaced by a single
 Ethos-U custom operator.
 
+Generating Vela-compiled model
+******************************
+
+Follow the steps below to generate Vela-compiled model and test input/output data.
+Use `keyword_spotting_cnn_small_int8`_ model in this sample:
+
+.. _keyword_spotting_cnn_small_int8: https://github.com/Arm-Examples/ML-zoo/tree/master/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8
+
+.. note:: The default Vela-compiled model is to target Ethos-U55 and 128 MAC
+   on MPS3 target. Because one model can add up to hundreds of KB, don't
+   attempt to add more models into code base for other targets.
+
+1. Downloading the files below from `keyword_spotting_cnn_small_int8`_:
+
+   - cnn_s_quantized.tflite
+   - testing_input/input/0.npy
+   - testing_output/identity/0.npy
+
+2. Optimizing the model for Ethos-U using Vela
+
+   Assuming target Ethos-U is U55 and 128 MAC:
+
+   .. code-block:: console
+
+       $ vela cnn_s_quantized.tflite \
+       --output-dir . \
+       --accelerator-config ethos-u55-128 \
+       --system-config Ethos_U55_High_End_Embedded \
+       --memory-mode Shared_Sram
+
+3. Removing unnecessary header
+
+   ``testing_input/input/0.npy`` and ``testing_output/0.npy`` have 128-byte header.
+   They must be removed for integration with this sample.
+
+   .. code-block:: console
+
+       $ dd if=testing_input/input/0.npy of=testing_input/input/0_no-header.npy bs=1 skip=128
+       $ dd if=testing_output/identity/0.npy of=testing_output/identity/0_no-header.npy bs=1 skip=128
+
+4. Converting to C array
+
+   .. code-block:: console
+
+       $ xxd -c 16 -i cnn_s_quantized.tflite cnn_s_quantized.tflite.h
+       $ xxd -c 16 -i cnn_s_quantized_vela.tflite cnn_s_quantized_vela.tflite.h
+       $ xxd -c 16 -i testing_input/input/0_no-header.npy testing_input/input/0_no-header.npy.h
+       $ xxd -c 16 -i testing_output/identity/0_no-header.npy testing_output/identity/0_no-header.npy.h
+
+5. Synchronizing to this sample
+
+   Synchronize the files below to ``keyword_spotting_cnn_small_int8`` directory
+   in this sample:
+
+   - cnn_s_quantized_vela.tflite.h > model.h
+   - testing_input/input/0_no-header.npy.h > input.h
+   - testing_output/identity/0_no-header.npy.h > output.h
+
+   .. note:: To run non-Vela-compiled model (``CONFIG_TAINT_BLOBS_TFLM_ETHOSU=n``),
+      synchronize ``cnn_s_quantized.tflite.h`` instead.
+
 Building and running
 ********************
 
diff --git a/samples/modules/tflite-micro/tflm_ethosu/boards/numaker_m55m1.overlay b/samples/modules/tflite-micro/tflm_ethosu/boards/numaker_m55m1.overlay
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+&npu0 {
+	status = "okay";
+};
diff --git a/samples/modules/tflite-micro/tflm_ethosu/src/inference_process.cpp b/samples/modules/tflite-micro/tflm_ethosu/src/inference_process.cpp
@@ -118,9 +118,16 @@ bool InferenceProcess::runJob(InferenceJob &job)
 	}
 
 	/* Create the TFL micro interpreter */
+#ifdef CONFIG_TAINT_BLOBS_TFLM_ETHOSU
 	tflite::MicroMutableOpResolver <1> resolver;
 	resolver.AddEthosU();
-
+#else
+	tflite::MicroMutableOpResolver <4> resolver;
+	resolver.AddReshape();
+	resolver.AddConv2D();
+	resolver.AddFullyConnected();
+	resolver.AddSoftmax();
+#endif
 	tflite::MicroInterpreter interpreter(model, resolver, tensorArena, tensorArenaSize);
 
 	/* Allocate tensors */
diff --git a/samples/modules/tflite-micro/tflm_ethosu/src/main.cpp b/samples/modules/tflite-micro/tflm_ethosu/src/main.cpp
@@ -98,7 +98,7 @@ volatile int totalCompletedJobs = 0;
 /* TensorArena static initialisation */
 const size_t arenaSize = TENSOR_ARENA_SIZE_PER_INFERENCE;
 
-__attribute__((section("tflm_arena"), aligned(16)))
+TENSOR_ARENA_ATTR
 uint8_t inferenceProcessTensorArena[NUM_INFERENCE_TASKS][arenaSize];
 
 /* Allocate and initialize heap */
diff --git a/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/input.h b/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/input.h
@@ -4,7 +4,14 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-__aligned(4) __attribute__((section("tflm_input"))) uint8_t inputData[] = {
+#if (defined(CONFIG_SOC_SERIES_MPS3) || defined(CONFIG_SOC_SERIES_MPS4)) &&                        \
+	DT_NODE_HAS_STATUS(DT_NODELABEL(ddr4), okay)
+#define INPUT_ATTR __aligned(4) __attribute__((section("tflm_input")))
+#else
+#define INPUT_ATTR __aligned(4) __attribute__((section(".rodata.tflm_input")))
+#endif
+
+INPUT_ATTR uint8_t inputData[] = {
 	0x2c, 0x8a, 0xff, 0x0c, 0xaf, 0x2a, 0x44, 0x17, 0xf5, 0x26, 0x96, 0x37, 0x40, 0x4c, 0xa1,
 	0x58, 0xc3, 0x33, 0xce, 0x1a, 0x7b, 0xd2, 0x22, 0x5b, 0x43, 0xf6, 0xfd, 0x0b, 0xe7, 0xfd,
 	0x65, 0x58, 0x89, 0x24, 0xf4, 0xec, 0x53, 0x5e, 0x21, 0x1f, 0x95, 0xd1, 0xd9, 0x25, 0x72,
diff --git a/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/model.h b/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/model.h
diff --git a/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/output.h b/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/output.h

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +/* SPDX-License-Identifier: Apache-2.0 */
++
 +&npu0 {
 +	status = "okay";
 +};