66 * LICENSE file in the root directory of this source tree.
77 */
88
9+ #include < dlfcn.h>
910#include < executorch/runtime/backend/interface.h>
1011#include < executorch/runtime/core/error.h>
1112#include < executorch/runtime/core/evalue.h>
12-
13- #include < dlfcn.h>
13+ #include < executorch/runtime/core/exec_aten/util/tensor_util.h>
1414#include < unistd.h>
1515#include < cstdio>
1616
@@ -105,15 +105,29 @@ class CudaBackend final : public ::executorch::runtime::BackendInterface {
105105 FreeableBuffer* processed, // This will be a empty buffer
106106 ArrayRef<CompileSpec> compile_specs // This will be my empty list
107107 ) const override {
108- const NamedDataMap* named_data_map = context.get_named_data_map ();
109-
110- string so_blob_key = " so_blob" ;
111-
112- Result<FreeableBuffer> aoti_cuda_buffer =
113- named_data_map->get_data (so_blob_key.c_str ());
108+ std::string method_name;
109+ for (const CompileSpec& spec : compile_specs) {
110+ if (std::strcmp (spec.key , " method_name" ) == 0 ) {
111+ method_name.assign (
112+ static_cast <const char *>(spec.value .buffer ),
113+ spec.value .nbytes ); // no nullptr guarantee, so pass size
114+ break ;
115+ }
116+ }
114117
115- ET_CHECK_OK_OR_RETURN_ERROR (aoti_cuda_buffer);
118+ std::string so_blob_key =
119+ method_name.empty () ? " so_blob" : method_name + " _so_blob" ;
116120
121+ const NamedDataMap* named_data_map = context.get_named_data_map ();
122+ auto aoti_cuda_buffer = named_data_map->get_data (so_blob_key.c_str ());
123+ if (!aoti_cuda_buffer.ok ()) {
124+ ET_LOG (
125+ Error,
126+ " Failed to get data for key %s: 0x%x" ,
127+ so_blob_key.c_str (),
128+ aoti_cuda_buffer.error ());
129+ return aoti_cuda_buffer.error ();
130+ }
117131 // Generate dynamic temporary file path
118132 filesystem::path temp_dir = filesystem::temp_directory_path ();
119133 filesystem::path so_path =
@@ -226,7 +240,7 @@ class CudaBackend final : public ::executorch::runtime::BackendInterface {
226240 return Error::Internal;
227241 }
228242 }
229-
243+ ET_LOG (Info, " Inputs copied to GPU " );
230244 // Process output tensors: create GPU counterparts for ExecutorTorch CPU
231245 // tensors
232246 for (int i = 0 ; i < n_outputs; i++) {
@@ -255,7 +269,7 @@ class CudaBackend final : public ::executorch::runtime::BackendInterface {
255269
256270 gpu_outputs[i] = gpu_output_handle;
257271 }
258-
272+ ET_LOG (Info, " Outputs created on GPU " );
259273 // Run AOTI container with GPU tensors
260274 AOTIRuntimeError error = AOTInductorModelContainerRun (
261275 handle->container_handle ,
@@ -277,11 +291,15 @@ class CudaBackend final : public ::executorch::runtime::BackendInterface {
277291 // Copy GPU output results back to CPU output tensors
278292 for (int i = 0 ; i < n_outputs; i++) {
279293 auto cpu_output_tensor = &(args[i + n_inputs]->toTensor ());
280- Error copy_err = aoti_torch_copy_ (cpu_output_tensor, gpu_outputs[i], 0 );
281- if (copy_err != Error::Ok) {
282- ET_LOG (Error, " Failed to copy GPU output %d back to CPU" , i);
283- return Error::Internal;
284- }
294+ // For DYNAMIC_BOUND tensors we try to resize
295+ ET_CHECK_OK_OR_RETURN_ERROR (
296+ resize_tensor (*cpu_output_tensor, gpu_outputs[i]->sizes ()),
297+ " Error resizing tensor at output index %d" ,
298+ i);
299+ ET_CHECK_OK_OR_RETURN_ERROR (
300+ aoti_torch_copy_ (cpu_output_tensor, gpu_outputs[i], 0 ),
301+ " Failed to copy GPU output %d back to CPU" ,
302+ i);
285303 }
286304
287305 // Clean up GPU tensors that we created (ExecutorTorch tensors are always
0 commit comments