10
10
#include < vector>
11
11
12
12
#include < executorch/extension/data_loader/buffer_data_loader.h>
13
- #include < executorch/runtime/executor/executor.h>
13
+ #include < executorch/runtime/executor/method.h>
14
+ #include < executorch/runtime/executor/program.h>
14
15
#include < executorch/runtime/platform/log.h>
15
16
#include < executorch/runtime/platform/runtime.h>
16
17
#include < executorch/util/read_file.h>
@@ -30,7 +31,7 @@ using namespace torch::executor;
30
31
* For ExecuTorch to work efficiently in these environments, we want to
31
32
* initialize the execution plan once once for the model and avoid
32
33
* re-initializing it for every inference. This can be achieved by restricting
33
- * the runtime contexts (torch::executor::Program and torch::executor::Executor )
34
+ * the runtime contexts (torch::executor::Program and torch::executor::Method )
34
35
* to live in a pre-allocated, shared, and persistent memory.
35
36
*
36
37
* This tool demonstrates that the memory can be managed this way.
@@ -123,7 +124,7 @@ MemoryManager* create_memory_manager(
123
124
ET_CHECK (temp_allocator != nullptr );
124
125
new (temp_allocator) MemoryAllocator (0 , nullptr );
125
126
126
- // Assemble all of the allocators into the MemoryManager that the Executor
127
+ // Assemble all of the allocators into the MemoryManager that the Method
127
128
// will use.
128
129
auto * memory_manager = worker_allocator.allocateInstance <MemoryManager>();
129
130
ET_CHECK (memory_manager != nullptr );
@@ -133,7 +134,7 @@ MemoryManager* create_memory_manager(
133
134
return memory_manager;
134
135
}
135
136
136
- ExecutionPlan * init_method (
137
+ Method * init_method (
137
138
Program* program,
138
139
const char * method_name,
139
140
MemoryAllocator& worker_allocator,
@@ -143,47 +144,46 @@ ExecutionPlan* init_method(
143
144
create_memory_manager (program, method_name, worker_allocator);
144
145
145
146
//
146
- // Create an Executor and ExecutionPlan from the program, using the provided
147
- // allocators. The ExecutionPlan is what actually runs the model. It is
147
+ // Create and load a method from the program, using the provided
148
+ // allocators. The Method is what actually runs the model. It is
148
149
// mutable, so should only be used by a single thread at at time, but it can
149
150
// be reused.
150
151
//
151
152
152
- auto * executor = worker_allocator.allocateInstance <Executor>();
153
- ET_CHECK (executor != nullptr );
154
- new (executor) Executor (program, memory_manager);
155
-
156
- Error status = executor->init_execution_plan (method_name);
153
+ auto * method = worker_allocator.allocateInstance <Method>();
154
+ ET_CHECK (method != nullptr );
155
+ auto method_res = program->load_method (method_name, memory_manager);
157
156
ET_CHECK_MSG (
158
- status == Error::Ok,
159
- " init_execution_plan ('%s') failed with status 0x%" PRIx32,
157
+ method_res. error () == Error::Ok,
158
+ " loading method ('%s') failed with status 0x%" PRIx32,
160
159
method_name,
161
- status);
160
+ method_res.error ());
161
+ new (method) Method (std::move (method_res.get ()));
162
+
162
163
ET_LOG (Info, " Model method '%s' initialized." , method_name);
163
- auto & plan = executor->execution_plan ();
164
164
165
165
// Gather the byte size of each input/output tensor.
166
- const size_t input_size = plan. inputs_size ();
166
+ const size_t input_size = method-> inputs_size ();
167
167
for (size_t i = 0 ; i < input_size; i++) {
168
- if (!plan. get_input (i).isTensor ()) {
168
+ if (!method-> get_input (i).isTensor ()) {
169
169
ET_LOG (Info, " input %zu is not a tensor, skipping" , i);
170
170
continue ;
171
171
}
172
- const auto & t = plan. get_input (i).toTensor ();
172
+ const auto & t = method-> get_input (i).toTensor ();
173
173
input_sizes.push_back (t.nbytes ());
174
174
}
175
175
176
- const size_t output_size = plan. outputs_size ();
176
+ const size_t output_size = method-> outputs_size ();
177
177
for (size_t i = 0 ; i < output_size; i++) {
178
- const auto & t = plan. get_output (i).toTensor ();
178
+ const auto & t = method-> get_output (i).toTensor ();
179
179
output_sizes.push_back (t.nbytes ());
180
180
}
181
181
182
- return &plan ;
182
+ return method ;
183
183
}
184
184
185
185
void inference_loop (
186
- ExecutionPlan* plan ,
186
+ Method* method ,
187
187
const std::vector<void *>& input_buffers,
188
188
const std::vector<void *>& output_buffers) {
189
189
ET_LOG (
@@ -194,12 +194,12 @@ void inference_loop(
194
194
// Prepare the inputs.
195
195
{
196
196
size_t bufi = 0 ;
197
- for (size_t i = 0 ; i < plan ->inputs_size (); i++) {
198
- if (!plan ->get_input (i).isTensor ()) {
197
+ for (size_t i = 0 ; i < method ->inputs_size (); i++) {
198
+ if (!method ->get_input (i).isTensor ()) {
199
199
ET_LOG (Info, " input %zu is not a tensor, skipping" , i);
200
200
continue ;
201
201
}
202
- const auto & t = plan ->get_input (i).toTensor ();
202
+ const auto & t = method ->get_input (i).toTensor ();
203
203
ET_CHECK_MSG (
204
204
bufi < input_buffers.size (), " Not enough input buffers for model" );
205
205
t.set_data (input_buffers[bufi++]);
@@ -210,12 +210,12 @@ void inference_loop(
210
210
// Prepare the outputs.
211
211
{
212
212
size_t bufi = 0 ;
213
- for (size_t i = 0 ; i < plan ->outputs_size (); i++) {
214
- if (!plan ->get_output (i).isTensor ()) {
213
+ for (size_t i = 0 ; i < method ->outputs_size (); i++) {
214
+ if (!method ->get_output (i).isTensor ()) {
215
215
ET_LOG (Info, " output %zu is not a tensor, skipping" , i);
216
216
continue ;
217
217
}
218
- const auto & t = plan ->get_output (i).toTensor ();
218
+ const auto & t = method ->get_output (i).toTensor ();
219
219
ET_CHECK_MSG (
220
220
bufi < output_buffers.size (), " Not enough output buffers for model" );
221
221
t.set_data (output_buffers[bufi++]);
@@ -224,7 +224,7 @@ void inference_loop(
224
224
ET_LOG (Info, " Outputs prepared." );
225
225
226
226
// Run the model.
227
- Error status = plan ->execute ();
227
+ Error status = method ->execute ();
228
228
ET_CHECK_MSG (
229
229
status == Error::Ok,
230
230
" plan->execute() failed with status 0x%" PRIx32,
@@ -275,10 +275,10 @@ int main(int argc, char** argv) {
275
275
ET_CHECK (program != nullptr );
276
276
277
277
/*
278
- * Step 4: The worker core sets up the Executor and initalizes the execution
279
- * plan. Here we let the control core read out the I/O info from the
280
- * execution plan. This can also be done on the control core from the
281
- * program flatbuffer, though there is no direct API at the moment.
278
+ * Step 4: The worker core sets up the Method. Here we let the control
279
+ * core read out the I/O info from the Method. This can also be done on
280
+ * the control core from the program flatbuffer, though there is no
281
+ * direct API at the moment.
282
282
*/
283
283
284
284
// Get the method name to execute.
@@ -295,7 +295,7 @@ int main(int argc, char** argv) {
295
295
std::vector<size_t > input_sizes;
296
296
std::vector<size_t > output_sizes;
297
297
298
- ExecutionPlan* plan = worker::init_method (
298
+ Method* method = worker::init_method (
299
299
program, method_name, worker_allocator, input_sizes, output_sizes);
300
300
301
301
ET_LOG (
@@ -331,7 +331,7 @@ int main(int argc, char** argv) {
331
331
*/
332
332
333
333
// Run the inference on the inputs. CHECK-fails on error.
334
- worker::inference_loop (plan , input_buffers, output_buffers);
334
+ worker::inference_loop (method , input_buffers, output_buffers);
335
335
336
336
for (void * buffer : input_buffers) {
337
337
free (buffer);
0 commit comments