@@ -128,15 +128,22 @@ const float et_rtol = 0.01;
128128 * The temp_allocation_pool is used for allocating temporary data during kernel
129129 * or delegate execution. This will be reset after each kernel or delegate call.
130130 * Currently a MemoryAllocator is used but a PlatformMemoryAllocator is probably
131- * a better fit
131+ * a better fit.
132+ *
133+ * The Corstone-300 and Corstone-320 platforms have 2MB of SRAM, we allocate
134+ * temporary pool that can fully utilize the memory subsystem of the platform.
135+ * If your NN requires more than 2MB of SRAM for the peak intermediate tensor
136+ * (Total SRAM Used in the AoT Vela summary), consider compiling your model with
137+ * the --optimise Size CLI option in the Vela compile spec to lower the SRAM
138+ * consumption of the model.
132139 */
133140#if !defined(ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE)
134- #define ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE (1 * 1024 * 1024 )
141+ #define ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE (2 * 1024 * 1024 )
135142#endif
136143const size_t temp_allocation_pool_size =
137144 ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE;
138145unsigned char __attribute__ ((
139- section (" input_data_sec " ),
146+ section (" .bss.ethosu_scratch " ),
140147 aligned(16 ))) temp_allocation_pool[temp_allocation_pool_size];
141148
142149void et_pal_init (void ) {
@@ -207,7 +214,7 @@ namespace {
207214class ArmMemoryAllocator : public executorch ::runtime::MemoryAllocator {
208215 public:
209216 ArmMemoryAllocator (uint32_t size, uint8_t * base_address)
210- : MemoryAllocator(size, base_address), used_(0 ) {}
217+ : MemoryAllocator(size, base_address), used_(0 ), peak_used_( 0 ) {}
211218
212219 void * allocate (size_t size, size_t alignment = kDefaultAlignment ) override {
213220 void * ret = executorch::runtime::MemoryAllocator::allocate (size, alignment);
@@ -222,6 +229,8 @@ class ArmMemoryAllocator : public executorch::runtime::MemoryAllocator {
222229 } else {
223230 used_ = (used_ | (alignment - 1 )) + 1 + size;
224231 }
232+ if (used_ > peak_used_)
233+ peak_used_ = used_;
225234 }
226235 return ret;
227236 }
@@ -231,13 +240,25 @@ class ArmMemoryAllocator : public executorch::runtime::MemoryAllocator {
231240 return used_;
232241 }
233242
243+ // Returns the peak memory usage of the allocator's memory buffer
244+ // Peak usage is useful when doing multiple allocations & resets
245+ size_t peak_used () const {
246+ return peak_used_;
247+ }
248+
234249 // Returns the free size of the allocator's memory buffer.
235250 size_t free_size () const {
236251 return executorch::runtime::MemoryAllocator::size () - used_;
237252 }
238253
254+ void reset () {
255+ executorch::runtime::MemoryAllocator::reset ();
256+ used_ = 0 ;
257+ }
258+
239259 private:
240260 size_t used_;
261+ size_t peak_used_;
241262};
242263
243264Result<BufferCleanup> prepare_input_tensors (
@@ -682,11 +703,11 @@ int main(int argc, const char* argv[]) {
682703 if (temp_allocator.size () > 0 ) {
683704 ET_LOG (
684705 Info,
685- " temp_allocator_used : %zu / %zu free: %zu ( used: %zu %% ) " ,
686- temp_allocator.used_size (),
706+ " peak_temp_allocator : %zu / %zu free: %zu ( used: %zu %% ) " ,
707+ temp_allocator.peak_used (),
687708 temp_allocator.size (),
688709 temp_allocator.free_size (),
689- 100 * temp_allocator.used_size () / temp_allocator.size ());
710+ 100 * temp_allocator.peak_used () / temp_allocator.size ());
690711 }
691712
692713 if (status != Error::Ok) {
0 commit comments