From 1f75e0f11a26645073a53df513681010de09a813 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Lindstr=C3=B6m?= Date: Wed, 25 Jun 2025 11:30:36 +0200 Subject: [PATCH] Arm backend: Split executor runner into init + run Refactor the main function of arm_executor_runner.cpp by extracting code that is related to initializing and running a model into two separate helper functions (runner_init and RunnerContext::run). A new struct called RunnerContext is introduced to store data/context required to run the model. Change-Id: Iaca06d6f12b84644ce76e23e8881787a7971cdaa --- .../executor_runner/arm_executor_runner.cpp | 422 +++++++++++------- 1 file changed, 265 insertions(+), 157 deletions(-) diff --git a/examples/arm/executor_runner/arm_executor_runner.cpp b/examples/arm/executor_runner/arm_executor_runner.cpp index afd16d7cdf9..27bbeee5465 100644 --- a/examples/arm/executor_runner/arm_executor_runner.cpp +++ b/examples/arm/executor_runner/arm_executor_runner.cpp @@ -229,6 +229,65 @@ void et_pal_free(ET_UNUSED void* ptr) {} namespace { +/// Lightweight heapless container that constructs and stores a T in-place. +template +class Box { + public: + Box() = default; + + ~Box() { + if (has_value) { + ptr()->~T(); + } + } + + Box(const Box&) = delete; + Box& operator=(const Box&) = delete; + + /// Destructs the already contained object if it's present and initialize a + /// new contained object while forwarding its constructor arguments. + template + void reset(Args&&... args) { + if (has_value) { + // Destroy the already contained object. + reinterpret_cast(mem)->~T(); + } + // Init the new object. + new (mem) T(std::forward(args)...); + has_value = true; + } + + /// Returns a reference to the contained object. + T& value() { + return *ptr(); + } + + /// Returns a const reference to the contained object. + const T& value() const { + return *ptr(); + } + + T* operator->() { + return ptr(); + } + + const T* operator->() const { + return ptr(); + } + + private: + alignas(T) uint8_t mem[sizeof(T)]; + bool has_value = false; + + T* ptr() { + return reinterpret_cast(mem); + } + + const T* ptr() const { + return reinterpret_cast(mem); + } +}; + // Setup our own allocator that can show some extra stuff like used and free // memory info class ArmMemoryAllocator : public executorch::runtime::MemoryAllocator { @@ -284,7 +343,7 @@ class ArmMemoryAllocator : public executorch::runtime::MemoryAllocator { Result prepare_input_tensors( Method& method, MemoryAllocator& allocator, - std::vector>& input_buffers) { + const std::vector>& input_buffers) { MethodMeta method_meta = method.method_meta(); size_t num_inputs = method_meta.num_inputs(); size_t num_allocated = 0; @@ -347,7 +406,7 @@ Result prepare_input_tensors( tensor_meta.get().dim_order().data())); Tensor t(&impl); - // If input_buffers.size <= 0, we don't have any input, fill t with 1's. + // If input_buffers.size <= 0, we don't have any input, fill it with 1's. if (input_buffers.size() <= 0) { for (size_t j = 0; j < t.numel(); j++) { switch (t.scalar_type()) { @@ -415,99 +474,51 @@ std::pair read_binary_file( } #endif -} // namespace +/// Holds all state needed for setup and run phases +struct RunnerContext { + RunnerContext() = default; + RunnerContext(const RunnerContext& ctx) = delete; + RunnerContext& operator=(const RunnerContext& ctx) = delete; -int main(int argc, const char* argv[]) { -#if defined(SEMIHOSTING) - ET_LOG(Info, "Running executor with parameter:"); - if (argc < 7) { - ET_LOG(Fatal, "Not right number of parameters!"); - ET_LOG( - Fatal, - "app -m model.pte -i input.bin [-i input2.bin] -o output_basename"); - ET_LOG(Fatal, "Exiting!"); - _exit(1); - } - ET_LOG(Info, " %s", argv[0]); - for (int i = 1; i < argc; i++) { - ET_LOG(Info, " %s %s", argv[i], argv[++i]); - } -#else - (void)argc; - (void)argv; + const char* method_name = nullptr; + size_t planned_buffer_memsize = 0; + size_t method_loaded_memsize = 0; + size_t executor_membase = 0; + size_t program_data_len = 0; + size_t input_memsize = 0; + size_t pte_size = 0; + bool bundle_io = false; + Box> prepared_inputs; + Box method_allocator; + Box temp_allocator; + Box> method; +#if defined(ET_EVENT_TRACER_ENABLED) + Box etdump_gen; #endif - executorch::runtime::runtime_init(); - std::vector> input_buffers; - size_t pte_size = sizeof(model_pte); - -#if defined(SEMIHOSTING) - const char* output_basename = nullptr; - ArmMemoryAllocator input_file_allocator( - input_file_allocation_pool_size, input_file_allocation_pool); - - /* parse input parameters */ - for (int i = 0; i < argc; i++) { - size_t nbr_inputs = 0; - if (std::strcmp(argv[i], "-i") == 0) { - // input file, read the data into memory - const char* input_tensor_filename = argv[++i]; - ET_LOG( - Info, - "Reading input tensor %d from file %s", - ++nbr_inputs, - input_tensor_filename); - auto [buffer, buffer_size] = - read_binary_file(input_tensor_filename, input_file_allocator); - if (buffer == nullptr) { - ET_LOG( - Error, - "Reading input tensor %d from file %s ERROR Out of memory", - nbr_inputs, - input_tensor_filename); - _exit(1); - } - input_buffers.push_back(std::make_pair(buffer, buffer_size)); - } else if (std::strcmp(argv[i], "-m") == 0) { - const char* pte_filename = argv[++i]; - ET_LOG(Info, "Reading pte model from file %s", pte_filename); - auto [buffer, buffer_size] = - read_binary_file(pte_filename, input_file_allocator); - if (buffer == nullptr) { - ET_LOG( - Error, - "Reading pte model from file %s ERROR Out of memory", - pte_filename); - _exit(1); - } - - // Store the model data with the same variable as if it was loaded - // from compiled in location. - model_pte = buffer; - pte_size = buffer_size; - } else if (std::strcmp(argv[i], "-o") == 0) { - // store the base filename to write output to. - output_basename = argv[++i]; - } - } -#endif - ET_LOG( - Info, "PTE in %p %c Size: %lu bytes", model_pte, model_pte[0], pte_size); + /// Runs the loaded method and returns the status + Error run(); +}; +void runner_init( + RunnerContext& ctx, + std::vector> input_buffers, + size_t pte_size) { // Find the offset to the embedded Program. const void* program_data = model_pte; - size_t program_data_len = pte_size; + ctx.program_data_len = pte_size; + ctx.pte_size = pte_size; #if defined(ET_BUNDLE_IO) - bool bundle_io = executorch::bundled_program::is_bundled_program( - reinterpret_cast(model_pte), pte_size); - if (bundle_io) { + ctx.bundle_io = executorch::bundled_program::is_bundled_program( + reinterpret_cast(model_pte), ctx.pte_size); + if (ctx.bundle_io) { // BundleIO bpte is provided, dig out the actual model from the data area Error status = executorch::bundled_program::get_program_data( reinterpret_cast(model_pte), - pte_size, + ctx.pte_size, &program_data, - &program_data_len); + &ctx.program_data_len); ET_CHECK_MSG( status == Error::Ok, @@ -515,8 +526,8 @@ int main(int argc, const char* argv[]) { (unsigned int)status); } #endif - auto loader = BufferDataLoader(program_data, program_data_len); - ET_LOG(Info, "PTE Model data loaded. Size: %lu bytes.", program_data_len); + auto loader = BufferDataLoader(program_data, ctx.program_data_len); + ET_LOG(Info, "PTE Model data loaded. Size: %lu bytes.", ctx.program_data_len); // Parse the program file. This is immutable, and can also be reused // between multiple execution invocations across multiple threads. @@ -531,20 +542,19 @@ int main(int argc, const char* argv[]) { ET_LOG(Info, "Model buffer loaded, has %lu methods", program->num_methods()); - const char* method_name = nullptr; { const auto method_name_result = program->get_method_name(0); ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); - method_name = *method_name_result; + ctx.method_name = *method_name_result; } - ET_LOG(Info, "Running method %s", method_name); + ET_LOG(Info, "Running method %s", ctx.method_name); - Result method_meta = program->method_meta(method_name); + Result method_meta = program->method_meta(ctx.method_name); if (!method_meta.ok()) { ET_LOG( Info, "Failed to get method_meta for %s: 0x%x", - method_name, + ctx.method_name, (unsigned int)method_meta.error()); } @@ -553,14 +563,14 @@ int main(int argc, const char* argv[]) { "Setup Method allocator pool. Size: %lu bytes.", method_allocation_pool_size); - ArmMemoryAllocator method_allocator( + ctx.method_allocator.reset( method_allocation_pool_size, method_allocation_pool); std::vector planned_buffers; // Owns the memory std::vector> planned_spans; // Passed to the allocator size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers(); - size_t planned_buffer_membase = method_allocator.used_size(); + size_t planned_buffer_membase = ctx.method_allocator->used_size(); for (size_t id = 0; id < num_memory_planned_buffers; ++id) { size_t buffer_size = @@ -569,7 +579,7 @@ int main(int argc, const char* argv[]) { /* Move to it's own allocator when MemoryPlanner is in place. */ uint8_t* buffer = - reinterpret_cast(method_allocator.allocate(buffer_size)); + reinterpret_cast(ctx.method_allocator->allocate(buffer_size)); ET_CHECK_MSG( buffer != nullptr, "Could not allocate memory for memory planned buffer size %zu", @@ -578,52 +588,53 @@ int main(int argc, const char* argv[]) { planned_spans.push_back({planned_buffers.back(), buffer_size}); } - size_t planned_buffer_memsize = - method_allocator.used_size() - planned_buffer_membase; + ctx.planned_buffer_memsize = + ctx.method_allocator->used_size() - planned_buffer_membase; HierarchicalAllocator planned_memory( {planned_spans.data(), planned_spans.size()}); - ArmMemoryAllocator temp_allocator( - temp_allocation_pool_size, temp_allocation_pool); + ctx.temp_allocator.reset(temp_allocation_pool_size, temp_allocation_pool); MemoryManager memory_manager( - &method_allocator, &planned_memory, &temp_allocator); + &ctx.method_allocator.value(), + &planned_memory, + &ctx.temp_allocator.value()); - size_t method_loaded_membase = method_allocator.used_size(); + size_t method_loaded_membase = ctx.method_allocator->used_size(); executorch::runtime::EventTracer* event_tracer_ptr = nullptr; #if defined(ET_EVENT_TRACER_ENABLED) ET_LOG(Info, "Setting up ETDump"); - torch::executor::ETDumpGen etdump_gen = torch::executor::ETDumpGen(); - event_tracer_ptr = &etdump_gen; + ctx.etdump_gen.reset(); + event_tracer_ptr = &ctx.etdump_gen.value(); #endif - Result method = - program->load_method(method_name, &memory_manager, event_tracer_ptr); + ctx.method.reset( + program->load_method(ctx.method_name, &memory_manager, event_tracer_ptr)); - if (!method.ok()) { + if (!ctx.method->ok()) { ET_LOG( Info, "Loading of method %s failed with status 0x%" PRIx32, - method_name, - method.error()); + ctx.method_name, + ctx.method->error()); } - size_t method_loaded_memsize = - method_allocator.used_size() - method_loaded_membase; - ET_LOG(Info, "Method '%s' loaded.", method_name); + ctx.method_loaded_memsize = + ctx.method_allocator->used_size() - method_loaded_membase; + ET_LOG(Info, "Method '%s' loaded.", ctx.method_name); ET_LOG(Info, "Preparing inputs..."); - size_t input_membase = method_allocator.used_size(); + size_t input_membase = ctx.method_allocator->used_size(); #if defined(ET_BUNDLE_IO) - if (bundle_io) { + if (ctx.bundle_io) { // Get inputs from bundled IO ".bpte" data // Useful for testing ET_LOG(Info, "Input testset[%d] from bundled bpte", testset_idx); Error status = executorch::bundled_program::load_bundled_input( - *method, model_pte, testset_idx); + *ctx.method.value(), model_pte, testset_idx); ET_CHECK_MSG( status == Error::Ok, "load_bundled_input failed with status 0x%" PRIx32, @@ -635,22 +646,22 @@ int main(int argc, const char* argv[]) { // Get inputs from SEMIHOSTING or fake it with a lot of "1" // Use "static" to force to compiler to remove this when it goes out of // scope - static auto prepared_inputs = - ::prepare_input_tensors(*method, method_allocator, input_buffers); + ctx.prepared_inputs.reset(::prepare_input_tensors( + *ctx.method.value(), ctx.method_allocator.value(), input_buffers)); - if (!prepared_inputs.ok()) { + if (!ctx.prepared_inputs->ok()) { ET_LOG( Info, "Preparing inputs tensors for method %s failed with status 0x%" PRIx32, - method_name, - prepared_inputs.error()); + ctx.method_name, + ctx.prepared_inputs->error()); } } #if defined(ET_DUMP_INPUT) { - std::vector inputs(method->inputs_size()); + std::vector inputs((*ctx.method.value())->inputs_size()); ET_LOG(Info, "%zu inputs: ", inputs.size()); - Error status = method->get_inputs(inputs.data(), inputs.size()); + Error status = ctx.method.value()->get_inputs(inputs.data(), inputs.size()); ET_CHECK(status == Error::Ok); for (int i = 0; i < inputs.size(); ++i) { @@ -692,19 +703,121 @@ int main(int argc, const char* argv[]) { } } #endif - size_t input_memsize = method_allocator.used_size() - input_membase; + ctx.input_memsize = ctx.method_allocator->used_size() - input_membase; + ctx.executor_membase = ctx.method_allocator->used_size(); + ET_LOG(Info, "Input prepared."); +} +Error RunnerContext::run() { ET_LOG(Info, "Starting the model execution..."); - size_t executor_membase = method_allocator.used_size(); + StartMeasurements(); // Run the model. - Error status = method->execute(); + Error status = method.value()->execute(); StopMeasurements(); - size_t executor_memsize = method_allocator.used_size() - executor_membase; - ET_LOG(Info, "model_pte_program_size: %lu bytes.", program_data_len); - ET_LOG(Info, "model_pte_loaded_size: %lu bytes.", pte_size); + return status; +} + +} // namespace + +int main(int argc, const char* argv[]) { +#if defined(SEMIHOSTING) + ET_LOG(Info, "Running executor with parameter:"); + if (argc < 7) { + ET_LOG(Fatal, "Not right number of parameters!"); + ET_LOG( + Fatal, + "app -m model.pte -i input.bin [-i input2.bin] -o output_basename"); + ET_LOG(Fatal, "Exiting!"); + _exit(1); + } + ET_LOG(Info, " %s", argv[0]); + for (int i = 1; i < argc; i++) { + ET_LOG(Info, " %s %s", argv[i], argv[++i]); + } +#else + (void)argc; + (void)argv; +#endif + + executorch::runtime::runtime_init(); + std::vector> input_buffers; + size_t pte_size = sizeof(model_pte); + +#if defined(SEMIHOSTING) + const char* output_basename = nullptr; + ArmMemoryAllocator input_file_allocator( + input_file_allocation_pool_size, input_file_allocation_pool); + + /* parse input parameters */ + for (int i = 0; i < argc; i++) { + size_t nbr_inputs = 0; + if (std::strcmp(argv[i], "-i") == 0) { + // input file, read the data into memory + const char* input_tensor_filename = argv[++i]; + ET_LOG( + Info, + "Reading input tensor %d from file %s", + ++nbr_inputs, + input_tensor_filename); + auto [buffer, buffer_size] = + read_binary_file(input_tensor_filename, input_file_allocator); + if (buffer == nullptr) { + ET_LOG( + Error, + "Reading input tensor %d from file %s ERROR Out of memory", + nbr_inputs, + input_tensor_filename); + _exit(1); + } + input_buffers.push_back(std::make_pair(buffer, buffer_size)); + } else if (std::strcmp(argv[i], "-m") == 0) { + const char* pte_filename = argv[++i]; + ET_LOG(Info, "Reading pte model from file %s", pte_filename); + auto [buffer, buffer_size] = + read_binary_file(pte_filename, input_file_allocator); + if (buffer == nullptr) { + ET_LOG( + Error, + "Reading pte model from file %s ERROR Out of memory", + pte_filename); + _exit(1); + } + + // Store the model data with the same variable as if it was loaded + // from compiled in location. + model_pte = buffer; + pte_size = buffer_size; + } else if (std::strcmp(argv[i], "-o") == 0) { + // store the base filename to write output to. + output_basename = argv[++i]; + } + } +#endif + ET_LOG( + Info, "PTE in %p %c Size: %lu bytes", model_pte, model_pte[0], pte_size); + + RunnerContext ctx; + runner_init(ctx, input_buffers, pte_size); + + Error status = ctx.run(); + if (status != Error::Ok) { + ET_LOG( + Info, + "Execution of method %s failed with status 0x%" PRIx32, + ctx.method_name, + status); + } else { + ET_LOG(Info, "Model executed successfully."); + } + + size_t executor_memsize = + ctx.method_allocator->used_size() - ctx.executor_membase; + + ET_LOG(Info, "model_pte_program_size: %lu bytes.", ctx.program_data_len); + ET_LOG(Info, "model_pte_loaded_size: %lu bytes.", ctx.pte_size); #if defined(SEMIHOSTING) if (input_file_allocator.size() > 0) { ET_LOG( @@ -716,44 +829,39 @@ int main(int argc, const char* argv[]) { 100 * input_file_allocator.used_size() / input_file_allocator.size()); } #endif - if (method_allocator.size() != 0) { - size_t method_allocator_used = method_allocator.used_size(); + if (ctx.method_allocator->size() != 0) { + size_t method_allocator_used = ctx.method_allocator->used_size(); ET_LOG( Info, "method_allocator_used: %zu / %zu free: %zu ( used: %zu %% ) ", method_allocator_used, - method_allocator.size(), - method_allocator.free_size(), - 100 * method_allocator_used / method_allocator.size()); + ctx.method_allocator->size(), + ctx.method_allocator->free_size(), + 100 * method_allocator_used / ctx.method_allocator->size()); ET_LOG( - Info, "method_allocator_planned: %zu bytes", planned_buffer_memsize); - ET_LOG(Info, "method_allocator_loaded: %zu bytes", method_loaded_memsize); - ET_LOG(Info, "method_allocator_input: %zu bytes", input_memsize); - ET_LOG(Info, "method_allocator_executor: %zu bytes", executor_memsize); - } - if (temp_allocator.size() > 0) { + Info, + "method_allocator_planned: %zu bytes", + ctx.planned_buffer_memsize); ET_LOG( Info, - "peak_temp_allocator: %zu / %zu free: %zu ( used: %zu %% ) ", - temp_allocator.peak_used(), - temp_allocator.size(), - temp_allocator.free_size(), - 100 * temp_allocator.peak_used() / temp_allocator.size()); + "method_allocator_loaded: %zu bytes", + ctx.method_loaded_memsize); + ET_LOG(Info, "method_allocator_input: %zu bytes", ctx.input_memsize); + ET_LOG(Info, "method_allocator_executor: %zu bytes", executor_memsize); } - - if (status != Error::Ok) { + if (ctx.temp_allocator->size() > 0) { ET_LOG( Info, - "Execution of method %s failed with status 0x%" PRIx32, - method_name, - status); - } else { - ET_LOG(Info, "Model executed successfully."); + "peak_temp_allocator: %zu / %zu free: %zu ( used: %zu %% ) ", + ctx.temp_allocator->peak_used(), + ctx.temp_allocator->size(), + ctx.temp_allocator->free_size(), + 100 * ctx.temp_allocator->peak_used() / ctx.temp_allocator->size()); } - std::vector outputs(method->outputs_size()); + std::vector outputs(ctx.method.value()->outputs_size()); ET_LOG(Info, "%zu outputs: ", outputs.size()); - status = method->get_outputs(outputs.data(), outputs.size()); + status = ctx.method.value()->get_outputs(outputs.data(), outputs.size()); ET_CHECK(status == Error::Ok); // Print the outputs. @@ -811,15 +919,15 @@ int main(int argc, const char* argv[]) { #if !defined(SEMIHOSTING) // Dump the etdump data containing profiling/debugging data to the serial line // base64 encoded - ETDumpResult result = etdump_gen.get_etdump_data(); + ETDumpResult result = ctx.etdump_gen->get_etdump_data(); if (result.buf != nullptr && result.size > 0) { // On a device with no file system we can't just write it out // to the file-system so we base64 encode it and dump it on the log. int mode = 0; size_t len = result.size; size_t encoded_len = base64_encoded_size(result.size, mode); - uint8_t* encoded_buf = - reinterpret_cast(method_allocator.allocate(encoded_len + 1)); + uint8_t* encoded_buf = reinterpret_cast( + ctx.method_allocator->allocate(encoded_len + 1)); if (encoded_buf != nullptr) { int ret = base64_encode( encoded_buf, (uint8_t*)result.buf, &encoded_len, &len, mode); @@ -853,10 +961,10 @@ int main(int argc, const char* argv[]) { #endif #if defined(ET_BUNDLE_IO) - if (bundle_io) { + if (ctx.bundle_io) { // Check result - ErrorStats stats = - compute_method_output_error_stats(*method, model_pte, testset_idx); + ErrorStats stats = compute_method_output_error_stats( + *ctx.method.value(), model_pte, testset_idx); if (stats.status == Error::Ok) { ET_LOG(Info, "=== Error stats for testset %d ===", testset_idx); ET_LOG(Info, " mean_absolute_error: %f", stats.mean_abs_error); @@ -873,7 +981,7 @@ int main(int argc, const char* argv[]) { // Verify the result. status = verify_method_outputs( - *method, model_pte, testset_idx, et_rtol, et_atol); + *ctx.method.value(), model_pte, testset_idx, et_rtol, et_atol); if (status == Error::Ok) { ET_LOG(Info, "Model output match expected BundleIO bpte ref data."); ET_LOG(Info, "TEST: BundleIO index[%d] Test_result: PASS", testset_idx);