diff --git a/examples/arm/executor_runner/arm_executor_runner.cpp b/examples/arm/executor_runner/arm_executor_runner.cpp index e0d35fb82a8..cacf5924b46 100644 --- a/examples/arm/executor_runner/arm_executor_runner.cpp +++ b/examples/arm/executor_runner/arm_executor_runner.cpp @@ -224,65 +224,6 @@ void et_pal_free(ET_UNUSED void* ptr) {} namespace { -/// Lightweight heapless container that constructs and stores a T in-place. -template -class Box { - public: - Box() = default; - - ~Box() { - if (has_value) { - ptr()->~T(); - } - } - - Box(const Box&) = delete; - Box& operator=(const Box&) = delete; - - /// Destructs the already contained object if it's present and initialize a - /// new contained object while forwarding its constructor arguments. - template - void reset(Args&&... args) { - if (has_value) { - // Destroy the already contained object. - reinterpret_cast(mem)->~T(); - } - // Init the new object. - new (mem) T(std::forward(args)...); - has_value = true; - } - - /// Returns a reference to the contained object. - T& value() { - return *ptr(); - } - - /// Returns a const reference to the contained object. - const T& value() const { - return *ptr(); - } - - T* operator->() { - return ptr(); - } - - const T* operator->() const { - return ptr(); - } - - private: - alignas(T) uint8_t mem[sizeof(T)]; - bool has_value = false; - - T* ptr() { - return reinterpret_cast(mem); - } - - const T* ptr() const { - return reinterpret_cast(mem); - } -}; - // Setup our own allocator that can show some extra stuff like used and free // memory info class ArmMemoryAllocator : public executorch::runtime::MemoryAllocator { @@ -338,7 +279,7 @@ class ArmMemoryAllocator : public executorch::runtime::MemoryAllocator { Result prepare_input_tensors( Method& method, MemoryAllocator& allocator, - const std::vector>& input_buffers) { + std::vector>& input_buffers) { MethodMeta method_meta = method.method_meta(); size_t num_inputs = method_meta.num_inputs(); size_t num_allocated = 0; @@ -401,7 +342,7 @@ Result prepare_input_tensors( tensor_meta.get().dim_order().data())); Tensor t(&impl); - // If input_buffers.size <= 0, we don't have any input, fill it with 1's. + // If input_buffers.size <= 0, we don't have any input, fill t with 1's. if (input_buffers.size() <= 0) { for (size_t j = 0; j < t.numel(); j++) { switch (t.scalar_type()) { @@ -469,51 +410,99 @@ std::pair read_binary_file( } #endif -/// Holds all state needed for setup and run phases -struct RunnerContext { - RunnerContext() = default; - RunnerContext(const RunnerContext& ctx) = delete; - RunnerContext& operator=(const RunnerContext& ctx) = delete; +} // namespace - const char* method_name = nullptr; - size_t planned_buffer_memsize = 0; - size_t method_loaded_memsize = 0; - size_t executor_membase = 0; - size_t program_data_len = 0; - size_t input_memsize = 0; - size_t pte_size = 0; - bool bundle_io = false; - Box> prepared_inputs; - Box method_allocator; - Box temp_allocator; - Box> method; -#if defined(ET_EVENT_TRACER_ENABLED) - Box etdump_gen; +int main(int argc, const char* argv[]) { +#if defined(SEMIHOSTING) + ET_LOG(Info, "Running executor with parameter:"); + if (argc < 7) { + ET_LOG(Fatal, "Not right number of parameters!"); + ET_LOG( + Fatal, + "app -m model.pte -i input.bin [-i input2.bin] -o output_basename"); + ET_LOG(Fatal, "Exiting!"); + _exit(1); + } + ET_LOG(Info, " %s", argv[0]); + for (int i = 1; i < argc; i++) { + ET_LOG(Info, " %s %s", argv[i], argv[++i]); + } +#else + (void)argc; + (void)argv; #endif - /// Runs the loaded method and returns the status - Error run(); -}; + executorch::runtime::runtime_init(); + std::vector> input_buffers; + size_t pte_size = sizeof(model_pte); + +#if defined(SEMIHOSTING) + const char* output_basename = nullptr; + ArmMemoryAllocator input_file_allocator( + input_file_allocation_pool_size, input_file_allocation_pool); + + /* parse input parameters */ + for (int i = 0; i < argc; i++) { + size_t nbr_inputs = 0; + if (std::strcmp(argv[i], "-i") == 0) { + // input file, read the data into memory + const char* input_tensor_filename = argv[++i]; + ET_LOG( + Info, + "Reading input tensor %d from file %s", + ++nbr_inputs, + input_tensor_filename); + auto [buffer, buffer_size] = + read_binary_file(input_tensor_filename, input_file_allocator); + if (buffer == nullptr) { + ET_LOG( + Error, + "Reading input tensor %d from file %s ERROR Out of memory", + nbr_inputs, + input_tensor_filename); + _exit(1); + } + input_buffers.push_back(std::make_pair(buffer, buffer_size)); + } else if (std::strcmp(argv[i], "-m") == 0) { + const char* pte_filename = argv[++i]; + ET_LOG(Info, "Reading pte model from file %s", pte_filename); + auto [buffer, buffer_size] = + read_binary_file(pte_filename, input_file_allocator); + if (buffer == nullptr) { + ET_LOG( + Error, + "Reading pte model from file %s ERROR Out of memory", + pte_filename); + _exit(1); + } + + // Store the model data with the same variable as if it was loaded + // from compiled in location. + model_pte = buffer; + pte_size = buffer_size; + } else if (std::strcmp(argv[i], "-o") == 0) { + // store the base filename to write output to. + output_basename = argv[++i]; + } + } +#endif + ET_LOG( + Info, "PTE in %p %c Size: %lu bytes", model_pte, model_pte[0], pte_size); -void runner_init( - RunnerContext& ctx, - std::vector> input_buffers, - size_t pte_size) { // Find the offset to the embedded Program. const void* program_data = model_pte; - ctx.program_data_len = pte_size; - ctx.pte_size = pte_size; + size_t program_data_len = pte_size; #if defined(ET_BUNDLE_IO) - ctx.bundle_io = executorch::bundled_program::is_bundled_program( - reinterpret_cast(model_pte), ctx.pte_size); - if (ctx.bundle_io) { + bool bundle_io = executorch::bundled_program::is_bundled_program( + reinterpret_cast(model_pte), pte_size); + if (bundle_io) { // BundleIO bpte is provided, dig out the actual model from the data area Error status = executorch::bundled_program::get_program_data( reinterpret_cast(model_pte), - ctx.pte_size, + pte_size, &program_data, - &ctx.program_data_len); + &program_data_len); ET_CHECK_MSG( status == Error::Ok, @@ -521,8 +510,8 @@ void runner_init( (unsigned int)status); } #endif - auto loader = BufferDataLoader(program_data, ctx.program_data_len); - ET_LOG(Info, "PTE Model data loaded. Size: %lu bytes.", ctx.program_data_len); + auto loader = BufferDataLoader(program_data, program_data_len); + ET_LOG(Info, "PTE Model data loaded. Size: %lu bytes.", program_data_len); // Parse the program file. This is immutable, and can also be reused // between multiple execution invocations across multiple threads. @@ -537,19 +526,20 @@ void runner_init( ET_LOG(Info, "Model buffer loaded, has %lu methods", program->num_methods()); + const char* method_name = nullptr; { const auto method_name_result = program->get_method_name(0); ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); - ctx.method_name = *method_name_result; + method_name = *method_name_result; } - ET_LOG(Info, "Running method %s", ctx.method_name); + ET_LOG(Info, "Running method %s", method_name); - Result method_meta = program->method_meta(ctx.method_name); + Result method_meta = program->method_meta(method_name); if (!method_meta.ok()) { ET_LOG( Info, "Failed to get method_meta for %s: 0x%x", - ctx.method_name, + method_name, (unsigned int)method_meta.error()); } @@ -558,14 +548,14 @@ void runner_init( "Setup Method allocator pool. Size: %lu bytes.", method_allocation_pool_size); - ctx.method_allocator.reset( + ArmMemoryAllocator method_allocator( method_allocation_pool_size, method_allocation_pool); std::vector planned_buffers; // Owns the memory std::vector> planned_spans; // Passed to the allocator size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers(); - size_t planned_buffer_membase = ctx.method_allocator->used_size(); + size_t planned_buffer_membase = method_allocator.used_size(); for (size_t id = 0; id < num_memory_planned_buffers; ++id) { size_t buffer_size = @@ -574,7 +564,7 @@ void runner_init( /* Move to it's own allocator when MemoryPlanner is in place. */ uint8_t* buffer = - reinterpret_cast(ctx.method_allocator->allocate(buffer_size)); + reinterpret_cast(method_allocator.allocate(buffer_size)); ET_CHECK_MSG( buffer != nullptr, "Could not allocate memory for memory planned buffer size %zu", @@ -583,53 +573,52 @@ void runner_init( planned_spans.push_back({planned_buffers.back(), buffer_size}); } - ctx.planned_buffer_memsize = - ctx.method_allocator->used_size() - planned_buffer_membase; + size_t planned_buffer_memsize = + method_allocator.used_size() - planned_buffer_membase; HierarchicalAllocator planned_memory( {planned_spans.data(), planned_spans.size()}); - ctx.temp_allocator.reset(temp_allocation_pool_size, temp_allocation_pool); + ArmMemoryAllocator temp_allocator( + temp_allocation_pool_size, temp_allocation_pool); MemoryManager memory_manager( - &ctx.method_allocator.value(), - &planned_memory, - &ctx.temp_allocator.value()); + &method_allocator, &planned_memory, &temp_allocator); - size_t method_loaded_membase = ctx.method_allocator->used_size(); + size_t method_loaded_membase = method_allocator.used_size(); executorch::runtime::EventTracer* event_tracer_ptr = nullptr; #if defined(ET_EVENT_TRACER_ENABLED) ET_LOG(Info, "Setting up ETDump"); - ctx.etdump_gen.reset(); - event_tracer_ptr = &ctx.etdump_gen.value(); + torch::executor::ETDumpGen etdump_gen = torch::executor::ETDumpGen(); + event_tracer_ptr = &etdump_gen; #endif - ctx.method.reset( - program->load_method(ctx.method_name, &memory_manager, event_tracer_ptr)); + Result method = + program->load_method(method_name, &memory_manager, event_tracer_ptr); - if (!ctx.method->ok()) { + if (!method.ok()) { ET_LOG( Info, "Loading of method %s failed with status 0x%" PRIx32, - ctx.method_name, - ctx.method->error()); + method_name, + method.error()); } - ctx.method_loaded_memsize = - ctx.method_allocator->used_size() - method_loaded_membase; - ET_LOG(Info, "Method '%s' loaded.", ctx.method_name); + size_t method_loaded_memsize = + method_allocator.used_size() - method_loaded_membase; + ET_LOG(Info, "Method '%s' loaded.", method_name); ET_LOG(Info, "Preparing inputs..."); - size_t input_membase = ctx.method_allocator->used_size(); + size_t input_membase = method_allocator.used_size(); #if defined(ET_BUNDLE_IO) - if (ctx.bundle_io) { + if (bundle_io) { // Get inputs from bundled IO ".bpte" data // Useful for testing ET_LOG(Info, "Input testset[%d] from bundled bpte", testset_idx); Error status = executorch::bundled_program::load_bundled_input( - *ctx.method.value(), model_pte, testset_idx); + *method, model_pte, testset_idx); ET_CHECK_MSG( status == Error::Ok, "load_bundled_input failed with status 0x%" PRIx32, @@ -641,22 +630,22 @@ void runner_init( // Get inputs from SEMIHOSTING or fake it with a lot of "1" // Use "static" to force to compiler to remove this when it goes out of // scope - ctx.prepared_inputs.reset(::prepare_input_tensors( - *ctx.method.value(), ctx.method_allocator.value(), input_buffers)); + static auto prepared_inputs = + ::prepare_input_tensors(*method, method_allocator, input_buffers); - if (!ctx.prepared_inputs->ok()) { + if (!prepared_inputs.ok()) { ET_LOG( Info, "Preparing inputs tensors for method %s failed with status 0x%" PRIx32, - ctx.method_name, - ctx.prepared_inputs->error()); + method_name, + prepared_inputs.error()); } } #if defined(ET_DUMP_INPUT) { - std::vector inputs((*ctx.method.value())->inputs_size()); + std::vector inputs(method->inputs_size()); ET_LOG(Info, "%zu inputs: ", inputs.size()); - Error status = ctx.method.value()->get_inputs(inputs.data(), inputs.size()); + Error status = method->get_inputs(inputs.data(), inputs.size()); ET_CHECK(status == Error::Ok); for (int i = 0; i < inputs.size(); ++i) { @@ -698,121 +687,19 @@ void runner_init( } } #endif - ctx.input_memsize = ctx.method_allocator->used_size() - input_membase; - ctx.executor_membase = ctx.method_allocator->used_size(); - + size_t input_memsize = method_allocator.used_size() - input_membase; ET_LOG(Info, "Input prepared."); -} -Error RunnerContext::run() { ET_LOG(Info, "Starting the model execution..."); - + size_t executor_membase = method_allocator.used_size(); StartMeasurements(); // Run the model. - Error status = method.value()->execute(); + Error status = method->execute(); StopMeasurements(); + size_t executor_memsize = method_allocator.used_size() - executor_membase; - return status; -} - -} // namespace - -int main(int argc, const char* argv[]) { -#if defined(SEMIHOSTING) - ET_LOG(Info, "Running executor with parameter:"); - if (argc < 7) { - ET_LOG(Fatal, "Not right number of parameters!"); - ET_LOG( - Fatal, - "app -m model.pte -i input.bin [-i input2.bin] -o output_basename"); - ET_LOG(Fatal, "Exiting!"); - _exit(1); - } - ET_LOG(Info, " %s", argv[0]); - for (int i = 1; i < argc; i++) { - ET_LOG(Info, " %s %s", argv[i], argv[++i]); - } -#else - (void)argc; - (void)argv; -#endif - - executorch::runtime::runtime_init(); - std::vector> input_buffers; - size_t pte_size = sizeof(model_pte); - -#if defined(SEMIHOSTING) - const char* output_basename = nullptr; - ArmMemoryAllocator input_file_allocator( - input_file_allocation_pool_size, input_file_allocation_pool); - - /* parse input parameters */ - for (int i = 0; i < argc; i++) { - size_t nbr_inputs = 0; - if (std::strcmp(argv[i], "-i") == 0) { - // input file, read the data into memory - const char* input_tensor_filename = argv[++i]; - ET_LOG( - Info, - "Reading input tensor %d from file %s", - ++nbr_inputs, - input_tensor_filename); - auto [buffer, buffer_size] = - read_binary_file(input_tensor_filename, input_file_allocator); - if (buffer == nullptr) { - ET_LOG( - Error, - "Reading input tensor %d from file %s ERROR Out of memory", - nbr_inputs, - input_tensor_filename); - _exit(1); - } - input_buffers.push_back(std::make_pair(buffer, buffer_size)); - } else if (std::strcmp(argv[i], "-m") == 0) { - const char* pte_filename = argv[++i]; - ET_LOG(Info, "Reading pte model from file %s", pte_filename); - auto [buffer, buffer_size] = - read_binary_file(pte_filename, input_file_allocator); - if (buffer == nullptr) { - ET_LOG( - Error, - "Reading pte model from file %s ERROR Out of memory", - pte_filename); - _exit(1); - } - - // Store the model data with the same variable as if it was loaded - // from compiled in location. - model_pte = buffer; - pte_size = buffer_size; - } else if (std::strcmp(argv[i], "-o") == 0) { - // store the base filename to write output to. - output_basename = argv[++i]; - } - } -#endif - ET_LOG( - Info, "PTE in %p %c Size: %lu bytes", model_pte, model_pte[0], pte_size); - - RunnerContext ctx; - runner_init(ctx, input_buffers, pte_size); - - Error status = ctx.run(); - if (status != Error::Ok) { - ET_LOG( - Info, - "Execution of method %s failed with status 0x%" PRIx32, - ctx.method_name, - status); - } else { - ET_LOG(Info, "Model executed successfully."); - } - - size_t executor_memsize = - ctx.method_allocator->used_size() - ctx.executor_membase; - - ET_LOG(Info, "model_pte_program_size: %lu bytes.", ctx.program_data_len); - ET_LOG(Info, "model_pte_loaded_size: %lu bytes.", ctx.pte_size); + ET_LOG(Info, "model_pte_program_size: %lu bytes.", program_data_len); + ET_LOG(Info, "model_pte_loaded_size: %lu bytes.", pte_size); #if defined(SEMIHOSTING) if (input_file_allocator.size() > 0) { ET_LOG( @@ -824,39 +711,44 @@ int main(int argc, const char* argv[]) { 100 * input_file_allocator.used_size() / input_file_allocator.size()); } #endif - if (ctx.method_allocator->size() != 0) { - size_t method_allocator_used = ctx.method_allocator->used_size(); + if (method_allocator.size() != 0) { + size_t method_allocator_used = method_allocator.used_size(); ET_LOG( Info, "method_allocator_used: %zu / %zu free: %zu ( used: %zu %% ) ", method_allocator_used, - ctx.method_allocator->size(), - ctx.method_allocator->free_size(), - 100 * method_allocator_used / ctx.method_allocator->size()); - ET_LOG( - Info, - "method_allocator_planned: %zu bytes", - ctx.planned_buffer_memsize); + method_allocator.size(), + method_allocator.free_size(), + 100 * method_allocator_used / method_allocator.size()); ET_LOG( - Info, - "method_allocator_loaded: %zu bytes", - ctx.method_loaded_memsize); - ET_LOG(Info, "method_allocator_input: %zu bytes", ctx.input_memsize); + Info, "method_allocator_planned: %zu bytes", planned_buffer_memsize); + ET_LOG(Info, "method_allocator_loaded: %zu bytes", method_loaded_memsize); + ET_LOG(Info, "method_allocator_input: %zu bytes", input_memsize); ET_LOG(Info, "method_allocator_executor: %zu bytes", executor_memsize); } - if (ctx.temp_allocator->size() > 0) { + if (temp_allocator.size() > 0) { ET_LOG( Info, "peak_temp_allocator: %zu / %zu free: %zu ( used: %zu %% ) ", - ctx.temp_allocator->peak_used(), - ctx.temp_allocator->size(), - ctx.temp_allocator->free_size(), - 100 * ctx.temp_allocator->peak_used() / ctx.temp_allocator->size()); + temp_allocator.peak_used(), + temp_allocator.size(), + temp_allocator.free_size(), + 100 * temp_allocator.peak_used() / temp_allocator.size()); + } + + if (status != Error::Ok) { + ET_LOG( + Info, + "Execution of method %s failed with status 0x%" PRIx32, + method_name, + status); + } else { + ET_LOG(Info, "Model executed successfully."); } - std::vector outputs(ctx.method.value()->outputs_size()); + std::vector outputs(method->outputs_size()); ET_LOG(Info, "%zu outputs: ", outputs.size()); - status = ctx.method.value()->get_outputs(outputs.data(), outputs.size()); + status = method->get_outputs(outputs.data(), outputs.size()); ET_CHECK(status == Error::Ok); // Print the outputs. @@ -914,15 +806,15 @@ int main(int argc, const char* argv[]) { #if !defined(SEMIHOSTING) // Dump the etdump data containing profiling/debugging data to the serial line // base64 encoded - ETDumpResult result = ctx.etdump_gen->get_etdump_data(); + ETDumpResult result = etdump_gen.get_etdump_data(); if (result.buf != nullptr && result.size > 0) { // On a device with no file system we can't just write it out // to the file-system so we base64 encode it and dump it on the log. int mode = 0; size_t len = result.size; size_t encoded_len = base64_encoded_size(result.size, mode); - uint8_t* encoded_buf = reinterpret_cast( - ctx.method_allocator->allocate(encoded_len + 1)); + uint8_t* encoded_buf = + reinterpret_cast(method_allocator.allocate(encoded_len + 1)); if (encoded_buf != nullptr) { int ret = base64_encode( encoded_buf, (uint8_t*)result.buf, &encoded_len, &len, mode); @@ -956,10 +848,10 @@ int main(int argc, const char* argv[]) { #endif #if defined(ET_BUNDLE_IO) - if (ctx.bundle_io) { + if (bundle_io) { // Verify the result. status = executorch::bundled_program::verify_method_outputs( - *ctx.method.value(), model_pte, testset_idx, et_rtol, et_atol); + *method, model_pte, testset_idx, et_rtol, et_atol); if (status == Error::Ok) { ET_LOG(Info, "Model output match expected BundleIO bpte ref data."); ET_LOG(Info, "TEST: BundleIO index[%d] Test_result: PASS", testset_idx);