diff --git a/examples/arm/executor_runner/arm_executor_runner.cpp b/examples/arm/executor_runner/arm_executor_runner.cpp
index e0d35fb82a8..cacf5924b46 100644
--- a/examples/arm/executor_runner/arm_executor_runner.cpp
+++ b/examples/arm/executor_runner/arm_executor_runner.cpp
@@ -224,65 +224,6 @@ void et_pal_free(ET_UNUSED void* ptr) {}
 
 namespace {
 
-/// Lightweight heapless container that constructs and stores a T in-place.
-template <typename T>
-class Box {
- public:
-  Box() = default;
-
-  ~Box() {
-    if (has_value) {
-      ptr()->~T();
-    }
-  }
-
-  Box(const Box&) = delete;
-  Box& operator=(const Box&) = delete;
-
-  /// Destructs the already contained object if it's present and initialize a
-  /// new contained object while forwarding its constructor arguments.
-  template <typename... Args>
-  void reset(Args&&... args) {
-    if (has_value) {
-      // Destroy the already contained object.
-      reinterpret_cast<T*>(mem)->~T();
-    }
-    // Init the new object.
-    new (mem) T(std::forward<Args>(args)...);
-    has_value = true;
-  }
-
-  /// Returns a reference to the contained object.
-  T& value() {
-    return *ptr();
-  }
-
-  /// Returns a const reference to the contained object.
-  const T& value() const {
-    return *ptr();
-  }
-
-  T* operator->() {
-    return ptr();
-  }
-
-  const T* operator->() const {
-    return ptr();
-  }
-
- private:
-  alignas(T) uint8_t mem[sizeof(T)];
-  bool has_value = false;
-
-  T* ptr() {
-    return reinterpret_cast<T*>(mem);
-  }
-
-  const T* ptr() const {
-    return reinterpret_cast<const T*>(mem);
-  }
-};
-
 // Setup our own allocator that can show some extra stuff like used and free
 // memory info
 class ArmMemoryAllocator : public executorch::runtime::MemoryAllocator {
@@ -338,7 +279,7 @@ class ArmMemoryAllocator : public executorch::runtime::MemoryAllocator {
 Result<BufferCleanup> prepare_input_tensors(
     Method& method,
     MemoryAllocator& allocator,
-    const std::vector<std::pair<char*, size_t>>& input_buffers) {
+    std::vector<std::pair<char*, size_t>>& input_buffers) {
   MethodMeta method_meta = method.method_meta();
   size_t num_inputs = method_meta.num_inputs();
   size_t num_allocated = 0;
@@ -401,7 +342,7 @@ Result<BufferCleanup> prepare_input_tensors(
             tensor_meta.get().dim_order().data()));
     Tensor t(&impl);
 
-    // If input_buffers.size <= 0, we don't have any input, fill it with 1's.
+    // If input_buffers.size <= 0, we don't have any input, fill t with 1's.
     if (input_buffers.size() <= 0) {
       for (size_t j = 0; j < t.numel(); j++) {
         switch (t.scalar_type()) {
@@ -469,51 +410,99 @@ std::pair<char*, size_t> read_binary_file(
 }
 #endif
 
-/// Holds all state needed for setup and run phases
-struct RunnerContext {
-  RunnerContext() = default;
-  RunnerContext(const RunnerContext& ctx) = delete;
-  RunnerContext& operator=(const RunnerContext& ctx) = delete;
+} // namespace
 
-  const char* method_name = nullptr;
-  size_t planned_buffer_memsize = 0;
-  size_t method_loaded_memsize = 0;
-  size_t executor_membase = 0;
-  size_t program_data_len = 0;
-  size_t input_memsize = 0;
-  size_t pte_size = 0;
-  bool bundle_io = false;
-  Box<Result<BufferCleanup>> prepared_inputs;
-  Box<ArmMemoryAllocator> method_allocator;
-  Box<ArmMemoryAllocator> temp_allocator;
-  Box<Result<Method>> method;
-#if defined(ET_EVENT_TRACER_ENABLED)
-  Box<torch::executor::ETDumpGen> etdump_gen;
+int main(int argc, const char* argv[]) {
+#if defined(SEMIHOSTING)
+  ET_LOG(Info, "Running executor with parameter:");
+  if (argc < 7) {
+    ET_LOG(Fatal, "Not right number of parameters!");
+    ET_LOG(
+        Fatal,
+        "app -m model.pte -i input.bin [-i input2.bin] -o output_basename");
+    ET_LOG(Fatal, "Exiting!");
+    _exit(1);
+  }
+  ET_LOG(Info, "   %s", argv[0]);
+  for (int i = 1; i < argc; i++) {
+    ET_LOG(Info, "   %s %s", argv[i], argv[++i]);
+  }
+#else
+  (void)argc;
+  (void)argv;
 #endif
 
-  /// Runs the loaded method and returns the status
-  Error run();
-};
+  executorch::runtime::runtime_init();
+  std::vector<std::pair<char*, size_t>> input_buffers;
+  size_t pte_size = sizeof(model_pte);
+
+#if defined(SEMIHOSTING)
+  const char* output_basename = nullptr;
+  ArmMemoryAllocator input_file_allocator(
+      input_file_allocation_pool_size, input_file_allocation_pool);
+
+  /* parse input parameters */
+  for (int i = 0; i < argc; i++) {
+    size_t nbr_inputs = 0;
+    if (std::strcmp(argv[i], "-i") == 0) {
+      // input file, read the data into memory
+      const char* input_tensor_filename = argv[++i];
+      ET_LOG(
+          Info,
+          "Reading input tensor %d from file %s",
+          ++nbr_inputs,
+          input_tensor_filename);
+      auto [buffer, buffer_size] =
+          read_binary_file(input_tensor_filename, input_file_allocator);
+      if (buffer == nullptr) {
+        ET_LOG(
+            Error,
+            "Reading input tensor %d from file %s ERROR Out of memory",
+            nbr_inputs,
+            input_tensor_filename);
+        _exit(1);
+      }
+      input_buffers.push_back(std::make_pair(buffer, buffer_size));
+    } else if (std::strcmp(argv[i], "-m") == 0) {
+      const char* pte_filename = argv[++i];
+      ET_LOG(Info, "Reading pte model from file %s", pte_filename);
+      auto [buffer, buffer_size] =
+          read_binary_file(pte_filename, input_file_allocator);
+      if (buffer == nullptr) {
+        ET_LOG(
+            Error,
+            "Reading pte model from file %s ERROR Out of memory",
+            pte_filename);
+        _exit(1);
+      }
+
+      // Store the model data with the same variable as if it was loaded
+      // from compiled in location.
+      model_pte = buffer;
+      pte_size = buffer_size;
+    } else if (std::strcmp(argv[i], "-o") == 0) {
+      // store the base filename to write output to.
+      output_basename = argv[++i];
+    }
+  }
+#endif
+  ET_LOG(
+      Info, "PTE in %p %c Size: %lu bytes", model_pte, model_pte[0], pte_size);
 
-void runner_init(
-    RunnerContext& ctx,
-    std::vector<std::pair<char*, size_t>> input_buffers,
-    size_t pte_size) {
   // Find the offset to the embedded Program.
   const void* program_data = model_pte;
-  ctx.program_data_len = pte_size;
-  ctx.pte_size = pte_size;
+  size_t program_data_len = pte_size;
 
 #if defined(ET_BUNDLE_IO)
-  ctx.bundle_io = executorch::bundled_program::is_bundled_program(
-      reinterpret_cast<void*>(model_pte), ctx.pte_size);
-  if (ctx.bundle_io) {
+  bool bundle_io = executorch::bundled_program::is_bundled_program(
+      reinterpret_cast<void*>(model_pte), pte_size);
+  if (bundle_io) {
     // BundleIO bpte is provided, dig out the actual model from the data area
     Error status = executorch::bundled_program::get_program_data(
         reinterpret_cast<void*>(model_pte),
-        ctx.pte_size,
+        pte_size,
         &program_data,
-        &ctx.program_data_len);
+        &program_data_len);
 
     ET_CHECK_MSG(
         status == Error::Ok,
@@ -521,8 +510,8 @@ void runner_init(
         (unsigned int)status);
   }
 #endif
-  auto loader = BufferDataLoader(program_data, ctx.program_data_len);
-  ET_LOG(Info, "PTE Model data loaded. Size: %lu bytes.", ctx.program_data_len);
+  auto loader = BufferDataLoader(program_data, program_data_len);
+  ET_LOG(Info, "PTE Model data loaded. Size: %lu bytes.", program_data_len);
 
   // Parse the program file. This is immutable, and can also be reused
   // between multiple execution invocations across multiple threads.
@@ -537,19 +526,20 @@ void runner_init(
 
   ET_LOG(Info, "Model buffer loaded, has %lu methods", program->num_methods());
 
+  const char* method_name = nullptr;
   {
     const auto method_name_result = program->get_method_name(0);
     ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
-    ctx.method_name = *method_name_result;
+    method_name = *method_name_result;
   }
-  ET_LOG(Info, "Running method %s", ctx.method_name);
+  ET_LOG(Info, "Running method %s", method_name);
 
-  Result<MethodMeta> method_meta = program->method_meta(ctx.method_name);
+  Result<MethodMeta> method_meta = program->method_meta(method_name);
   if (!method_meta.ok()) {
     ET_LOG(
         Info,
         "Failed to get method_meta for %s: 0x%x",
-        ctx.method_name,
+        method_name,
         (unsigned int)method_meta.error());
   }
 
@@ -558,14 +548,14 @@ void runner_init(
       "Setup Method allocator pool. Size: %lu bytes.",
       method_allocation_pool_size);
 
-  ctx.method_allocator.reset(
+  ArmMemoryAllocator method_allocator(
       method_allocation_pool_size, method_allocation_pool);
 
   std::vector<uint8_t*> planned_buffers; // Owns the memory
   std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator
   size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
 
-  size_t planned_buffer_membase = ctx.method_allocator->used_size();
+  size_t planned_buffer_membase = method_allocator.used_size();
 
   for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
     size_t buffer_size =
@@ -574,7 +564,7 @@ void runner_init(
 
     /* Move to it's own allocator when MemoryPlanner is in place. */
     uint8_t* buffer =
-        reinterpret_cast<uint8_t*>(ctx.method_allocator->allocate(buffer_size));
+        reinterpret_cast<uint8_t*>(method_allocator.allocate(buffer_size));
     ET_CHECK_MSG(
         buffer != nullptr,
         "Could not allocate memory for memory planned buffer size %zu",
@@ -583,53 +573,52 @@ void runner_init(
     planned_spans.push_back({planned_buffers.back(), buffer_size});
   }
 
-  ctx.planned_buffer_memsize =
-      ctx.method_allocator->used_size() - planned_buffer_membase;
+  size_t planned_buffer_memsize =
+      method_allocator.used_size() - planned_buffer_membase;
 
   HierarchicalAllocator planned_memory(
       {planned_spans.data(), planned_spans.size()});
 
-  ctx.temp_allocator.reset(temp_allocation_pool_size, temp_allocation_pool);
+  ArmMemoryAllocator temp_allocator(
+      temp_allocation_pool_size, temp_allocation_pool);
 
   MemoryManager memory_manager(
-      &ctx.method_allocator.value(),
-      &planned_memory,
-      &ctx.temp_allocator.value());
+      &method_allocator, &planned_memory, &temp_allocator);
 
-  size_t method_loaded_membase = ctx.method_allocator->used_size();
+  size_t method_loaded_membase = method_allocator.used_size();
 
   executorch::runtime::EventTracer* event_tracer_ptr = nullptr;
 
 #if defined(ET_EVENT_TRACER_ENABLED)
   ET_LOG(Info, "Setting up ETDump");
-  ctx.etdump_gen.reset();
-  event_tracer_ptr = &ctx.etdump_gen.value();
+  torch::executor::ETDumpGen etdump_gen = torch::executor::ETDumpGen();
+  event_tracer_ptr = &etdump_gen;
 #endif
 
-  ctx.method.reset(
-      program->load_method(ctx.method_name, &memory_manager, event_tracer_ptr));
+  Result<Method> method =
+      program->load_method(method_name, &memory_manager, event_tracer_ptr);
 
-  if (!ctx.method->ok()) {
+  if (!method.ok()) {
     ET_LOG(
         Info,
         "Loading of method %s failed with status 0x%" PRIx32,
-        ctx.method_name,
-        ctx.method->error());
+        method_name,
+        method.error());
   }
-  ctx.method_loaded_memsize =
-      ctx.method_allocator->used_size() - method_loaded_membase;
-  ET_LOG(Info, "Method '%s' loaded.", ctx.method_name);
+  size_t method_loaded_memsize =
+      method_allocator.used_size() - method_loaded_membase;
+  ET_LOG(Info, "Method '%s' loaded.", method_name);
 
   ET_LOG(Info, "Preparing inputs...");
-  size_t input_membase = ctx.method_allocator->used_size();
+  size_t input_membase = method_allocator.used_size();
 
 #if defined(ET_BUNDLE_IO)
-  if (ctx.bundle_io) {
+  if (bundle_io) {
     // Get inputs from bundled IO ".bpte" data
     // Useful for testing
     ET_LOG(Info, "Input testset[%d] from bundled bpte", testset_idx);
     Error status = executorch::bundled_program::load_bundled_input(
-        *ctx.method.value(), model_pte, testset_idx);
+        *method, model_pte, testset_idx);
     ET_CHECK_MSG(
         status == Error::Ok,
         "load_bundled_input failed with status 0x%" PRIx32,
@@ -641,22 +630,22 @@ void runner_init(
     // Get inputs from SEMIHOSTING or fake it with a lot of "1"
     // Use "static" to force to compiler to remove this when it goes out of
     // scope
-    ctx.prepared_inputs.reset(::prepare_input_tensors(
-        *ctx.method.value(), ctx.method_allocator.value(), input_buffers));
+    static auto prepared_inputs =
+        ::prepare_input_tensors(*method, method_allocator, input_buffers);
 
-    if (!ctx.prepared_inputs->ok()) {
+    if (!prepared_inputs.ok()) {
       ET_LOG(
           Info,
           "Preparing inputs tensors for method %s failed with status 0x%" PRIx32,
-          ctx.method_name,
-          ctx.prepared_inputs->error());
+          method_name,
+          prepared_inputs.error());
     }
   }
 #if defined(ET_DUMP_INPUT)
   {
-    std::vector<EValue> inputs((*ctx.method.value())->inputs_size());
+    std::vector<EValue> inputs(method->inputs_size());
     ET_LOG(Info, "%zu inputs: ", inputs.size());
-    Error status = ctx.method.value()->get_inputs(inputs.data(), inputs.size());
+    Error status = method->get_inputs(inputs.data(), inputs.size());
     ET_CHECK(status == Error::Ok);
 
     for (int i = 0; i < inputs.size(); ++i) {
@@ -698,121 +687,19 @@ void runner_init(
     }
   }
 #endif
-  ctx.input_memsize = ctx.method_allocator->used_size() - input_membase;
-  ctx.executor_membase = ctx.method_allocator->used_size();
-
+  size_t input_memsize = method_allocator.used_size() - input_membase;
   ET_LOG(Info, "Input prepared.");
-}
 
-Error RunnerContext::run() {
   ET_LOG(Info, "Starting the model execution...");
-
+  size_t executor_membase = method_allocator.used_size();
   StartMeasurements();
   // Run the model.
-  Error status = method.value()->execute();
+  Error status = method->execute();
   StopMeasurements();
+  size_t executor_memsize = method_allocator.used_size() - executor_membase;
 
-  return status;
-}
-
-} // namespace
-
-int main(int argc, const char* argv[]) {
-#if defined(SEMIHOSTING)
-  ET_LOG(Info, "Running executor with parameter:");
-  if (argc < 7) {
-    ET_LOG(Fatal, "Not right number of parameters!");
-    ET_LOG(
-        Fatal,
-        "app -m model.pte -i input.bin [-i input2.bin] -o output_basename");
-    ET_LOG(Fatal, "Exiting!");
-    _exit(1);
-  }
-  ET_LOG(Info, "   %s", argv[0]);
-  for (int i = 1; i < argc; i++) {
-    ET_LOG(Info, "   %s %s", argv[i], argv[++i]);
-  }
-#else
-  (void)argc;
-  (void)argv;
-#endif
-
-  executorch::runtime::runtime_init();
-  std::vector<std::pair<char*, size_t>> input_buffers;
-  size_t pte_size = sizeof(model_pte);
-
-#if defined(SEMIHOSTING)
-  const char* output_basename = nullptr;
-  ArmMemoryAllocator input_file_allocator(
-      input_file_allocation_pool_size, input_file_allocation_pool);
-
-  /* parse input parameters */
-  for (int i = 0; i < argc; i++) {
-    size_t nbr_inputs = 0;
-    if (std::strcmp(argv[i], "-i") == 0) {
-      // input file, read the data into memory
-      const char* input_tensor_filename = argv[++i];
-      ET_LOG(
-          Info,
-          "Reading input tensor %d from file %s",
-          ++nbr_inputs,
-          input_tensor_filename);
-      auto [buffer, buffer_size] =
-          read_binary_file(input_tensor_filename, input_file_allocator);
-      if (buffer == nullptr) {
-        ET_LOG(
-            Error,
-            "Reading input tensor %d from file %s ERROR Out of memory",
-            nbr_inputs,
-            input_tensor_filename);
-        _exit(1);
-      }
-      input_buffers.push_back(std::make_pair(buffer, buffer_size));
-    } else if (std::strcmp(argv[i], "-m") == 0) {
-      const char* pte_filename = argv[++i];
-      ET_LOG(Info, "Reading pte model from file %s", pte_filename);
-      auto [buffer, buffer_size] =
-          read_binary_file(pte_filename, input_file_allocator);
-      if (buffer == nullptr) {
-        ET_LOG(
-            Error,
-            "Reading pte model from file %s ERROR Out of memory",
-            pte_filename);
-        _exit(1);
-      }
-
-      // Store the model data with the same variable as if it was loaded
-      // from compiled in location.
-      model_pte = buffer;
-      pte_size = buffer_size;
-    } else if (std::strcmp(argv[i], "-o") == 0) {
-      // store the base filename to write output to.
-      output_basename = argv[++i];
-    }
-  }
-#endif
-  ET_LOG(
-      Info, "PTE in %p %c Size: %lu bytes", model_pte, model_pte[0], pte_size);
-
-  RunnerContext ctx;
-  runner_init(ctx, input_buffers, pte_size);
-
-  Error status = ctx.run();
-  if (status != Error::Ok) {
-    ET_LOG(
-        Info,
-        "Execution of method %s failed with status 0x%" PRIx32,
-        ctx.method_name,
-        status);
-  } else {
-    ET_LOG(Info, "Model executed successfully.");
-  }
-
-  size_t executor_memsize =
-      ctx.method_allocator->used_size() - ctx.executor_membase;
-
-  ET_LOG(Info, "model_pte_program_size:     %lu bytes.", ctx.program_data_len);
-  ET_LOG(Info, "model_pte_loaded_size:      %lu bytes.", ctx.pte_size);
+  ET_LOG(Info, "model_pte_program_size:     %lu bytes.", program_data_len);
+  ET_LOG(Info, "model_pte_loaded_size:      %lu bytes.", pte_size);
 #if defined(SEMIHOSTING)
   if (input_file_allocator.size() > 0) {
     ET_LOG(
@@ -824,39 +711,44 @@ int main(int argc, const char* argv[]) {
         100 * input_file_allocator.used_size() / input_file_allocator.size());
   }
 #endif
-  if (ctx.method_allocator->size() != 0) {
-    size_t method_allocator_used = ctx.method_allocator->used_size();
+  if (method_allocator.size() != 0) {
+    size_t method_allocator_used = method_allocator.used_size();
     ET_LOG(
         Info,
         "method_allocator_used:     %zu / %zu  free: %zu ( used: %zu %% ) ",
         method_allocator_used,
-        ctx.method_allocator->size(),
-        ctx.method_allocator->free_size(),
-        100 * method_allocator_used / ctx.method_allocator->size());
-    ET_LOG(
-        Info,
-        "method_allocator_planned:  %zu bytes",
-        ctx.planned_buffer_memsize);
+        method_allocator.size(),
+        method_allocator.free_size(),
+        100 * method_allocator_used / method_allocator.size());
     ET_LOG(
-        Info,
-        "method_allocator_loaded:   %zu bytes",
-        ctx.method_loaded_memsize);
-    ET_LOG(Info, "method_allocator_input:    %zu bytes", ctx.input_memsize);
+        Info, "method_allocator_planned:  %zu bytes", planned_buffer_memsize);
+    ET_LOG(Info, "method_allocator_loaded:   %zu bytes", method_loaded_memsize);
+    ET_LOG(Info, "method_allocator_input:    %zu bytes", input_memsize);
     ET_LOG(Info, "method_allocator_executor: %zu bytes", executor_memsize);
   }
-  if (ctx.temp_allocator->size() > 0) {
+  if (temp_allocator.size() > 0) {
     ET_LOG(
         Info,
         "peak_temp_allocator:       %zu / %zu free: %zu ( used: %zu %% ) ",
-        ctx.temp_allocator->peak_used(),
-        ctx.temp_allocator->size(),
-        ctx.temp_allocator->free_size(),
-        100 * ctx.temp_allocator->peak_used() / ctx.temp_allocator->size());
+        temp_allocator.peak_used(),
+        temp_allocator.size(),
+        temp_allocator.free_size(),
+        100 * temp_allocator.peak_used() / temp_allocator.size());
+  }
+
+  if (status != Error::Ok) {
+    ET_LOG(
+        Info,
+        "Execution of method %s failed with status 0x%" PRIx32,
+        method_name,
+        status);
+  } else {
+    ET_LOG(Info, "Model executed successfully.");
   }
 
-  std::vector<EValue> outputs(ctx.method.value()->outputs_size());
+  std::vector<EValue> outputs(method->outputs_size());
   ET_LOG(Info, "%zu outputs: ", outputs.size());
-  status = ctx.method.value()->get_outputs(outputs.data(), outputs.size());
+  status = method->get_outputs(outputs.data(), outputs.size());
   ET_CHECK(status == Error::Ok);
 
   // Print the outputs.
@@ -914,15 +806,15 @@ int main(int argc, const char* argv[]) {
 #if !defined(SEMIHOSTING)
   // Dump the etdump data containing profiling/debugging data to the serial line
   // base64 encoded
-  ETDumpResult result = ctx.etdump_gen->get_etdump_data();
+  ETDumpResult result = etdump_gen.get_etdump_data();
   if (result.buf != nullptr && result.size > 0) {
     // On a device with no file system we can't just write it out
     // to the file-system so we base64 encode it and dump it on the log.
     int mode = 0;
     size_t len = result.size;
     size_t encoded_len = base64_encoded_size(result.size, mode);
-    uint8_t* encoded_buf = reinterpret_cast<uint8_t*>(
-        ctx.method_allocator->allocate(encoded_len + 1));
+    uint8_t* encoded_buf =
+        reinterpret_cast<uint8_t*>(method_allocator.allocate(encoded_len + 1));
     if (encoded_buf != nullptr) {
       int ret = base64_encode(
           encoded_buf, (uint8_t*)result.buf, &encoded_len, &len, mode);
@@ -956,10 +848,10 @@ int main(int argc, const char* argv[]) {
 #endif
 
 #if defined(ET_BUNDLE_IO)
-  if (ctx.bundle_io) {
+  if (bundle_io) {
     // Verify the result.
     status = executorch::bundled_program::verify_method_outputs(
-        *ctx.method.value(), model_pte, testset_idx, et_rtol, et_atol);
+        *method, model_pte, testset_idx, et_rtol, et_atol);
     if (status == Error::Ok) {
       ET_LOG(Info, "Model output match expected BundleIO bpte ref data.");
       ET_LOG(Info, "TEST: BundleIO index[%d] Test_result: PASS", testset_idx);