Skip to content

Commit a9ad3c6

Browse files
zingofacebook-github-bot
authored andcommitted
Arm backend: Improve memory config and documentation in the runtime (pytorch#5580)
Summary: Pull Request resolved: pytorch#5580 Reviewed By: digantdesai Differential Revision: D63637392 Pulled By: mergennachin fbshipit-source-id: 527414da91f072456cc49eb20cd68d12dde9e400
1 parent 06ce226 commit a9ad3c6

File tree

1 file changed

+51
-16
lines changed

1 file changed

+51
-16
lines changed

examples/arm/executor_runner/arm_executor_runner.cpp

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,26 +23,39 @@
2323
#include "arm_perf_monitor.h"
2424

2525
#ifdef SEMIHOSTING
26-
// In our unit test flow, we have the capability to provide an enitre model to
27-
// the Corstone-3xx FVP using semi hosting. Hence, the input allocation pool
28-
// needs to be large enough to take an entire model. On the FVP,
29-
// network_model_sec is linked to the DDR, which is large (256MB on
30-
// Corstone-300).
31-
const size_t input_allocation_pool_size = 100 * 1024 * 1024;
26+
27+
/**
28+
* The input_file_allocation_pool should be large enough to fit the various
29+
* input file data used when loading the data files when running semihosting
30+
* e.g. the input file data and the pte file data
31+
* In our unit test flow, we have the capability to provide an enitre model to
32+
* the Corstone-3xx FVP using semi hosting. Hence, the input file allocation
33+
* pool needs to be large enough to take an entire model and input. On the FVP,
34+
* network_model_sec is linked to the DDR, which is large (256MB on
35+
* Corstone-300).
36+
* If you use semihosting on your HW this can be lowered to fit your
37+
* files/memory
38+
*/
39+
40+
const size_t input_file_allocation_pool_size = 60 * 1024 * 1024;
3241
unsigned char __attribute__((
3342
section("network_model_sec"),
34-
aligned(16))) input_allocation_pool[input_allocation_pool_size];
35-
// memory for the model will be allocated from the input_allocation_pool
43+
aligned(16))) input_file_allocation_pool[input_file_allocation_pool_size];
3644
char* model_pte = nullptr;
45+
3746
#else
47+
3848
/**
3949
* This header file is generated by the build process based on the .pte file
4050
* specified in the ET_PTE_FILE_PATH variable to the cmake build.
4151
* Control of the action of the .pte, it's use of operators and delegates, and
4252
* which are included in the bare metal build are also orchestrated by the
4353
* CMakeLists file. For example use see examples/arm/run.sh
54+
*
55+
* e.g. This includes the pte as a big chunk of data struct into this file
4456
*/
4557
#include "model_pte.h"
58+
4659
#endif
4760

4861
using executorch::aten::ScalarType;
@@ -63,12 +76,34 @@ using executorch::runtime::Span;
6376
using executorch::runtime::Tag;
6477
using executorch::runtime::TensorInfo;
6578

66-
#define METHOD_ALLOCATOR_POOL_SIZE (70 * 1024 * 1024)
79+
/**
80+
* The method_allocation_pool should be large enough to fit the setup, input
81+
* used and other data used like the planned memory pool (e.g. memory-planned
82+
* buffers to use for mutable tensor data) In this example we run on a
83+
* Corstone-3xx FVP so we can use a lot of memory to be able to run and test
84+
* large models if you run on HW this should be lowered to fit into your
85+
* availible memory.
86+
*/
87+
#ifndef ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE
88+
#define ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE (60 * 1024 * 1024)
89+
#endif
90+
const size_t method_allocation_pool_size =
91+
ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE;
6792
unsigned char __attribute__((
6893
section("network_model_sec"),
69-
aligned(16))) method_allocation_pool[METHOD_ALLOCATOR_POOL_SIZE];
94+
aligned(16))) method_allocation_pool[method_allocation_pool_size];
7095

71-
const size_t temp_allocation_pool_size = 1 * 1024 * 1024;
96+
/**
97+
* The temp_allocation_pool is used for allocating temporary data during kernel
98+
* or delegate execution. This will be reset after each kernel or delegate call.
99+
* Currently a MemoryAllocator is used but a PlatformMemoryAllocator is probably
100+
* a better fit
101+
*/
102+
#ifndef ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE
103+
#define ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE (1 * 1024 * 1024)
104+
#endif
105+
const size_t temp_allocation_pool_size =
106+
ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE;
72107
unsigned char __attribute__((
73108
section("network_model_sec"),
74109
aligned(16))) temp_allocation_pool[temp_allocation_pool_size];
@@ -256,8 +291,8 @@ int main(int argc, const char* argv[]) {
256291

257292
#ifdef SEMIHOSTING
258293
const char* output_basename = nullptr;
259-
MemoryAllocator input_allocator(
260-
input_allocation_pool_size, input_allocation_pool);
294+
MemoryAllocator input_file_allocator(
295+
input_file_allocation_pool_size, input_file_allocation_pool);
261296

262297
/* parse input parameters */
263298
for (int i = 0; i < argc; i++) {
@@ -271,13 +306,13 @@ int main(int argc, const char* argv[]) {
271306
++nbr_inputs,
272307
input_tensor_filename);
273308
auto [buffer, buffer_size] =
274-
read_binary_file(input_tensor_filename, input_allocator);
309+
read_binary_file(input_tensor_filename, input_file_allocator);
275310
input_buffers.push_back(std::make_pair(buffer, buffer_size));
276311
} else if (std::strcmp(argv[i], "-m") == 0) {
277312
const char* pte_filename = argv[++i];
278313
ET_LOG(Info, "Reading pte model from file %s", pte_filename);
279314
auto [buffer, buffer_size] =
280-
read_binary_file(pte_filename, input_allocator);
315+
read_binary_file(pte_filename, input_file_allocator);
281316
// Store the model data with the same variable as if it was loaded
282317
// from compiled in location.
283318
model_pte = buffer;
@@ -320,7 +355,7 @@ int main(int argc, const char* argv[]) {
320355
}
321356

322357
MemoryAllocator method_allocator(
323-
METHOD_ALLOCATOR_POOL_SIZE, method_allocation_pool);
358+
method_allocation_pool_size, method_allocation_pool);
324359

325360
std::vector<uint8_t*> planned_buffers; // Owns the memory
326361
std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator

0 commit comments

Comments
 (0)