Skip to content

Commit c838eee

Browse files
committed
Resize tensor
1 parent 4222fe6 commit c838eee

File tree

1 file changed

+34
-16
lines changed

1 file changed

+34
-16
lines changed

backends/cuda/runtime/cuda_backend.cpp

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <dlfcn.h>
910
#include <executorch/runtime/backend/interface.h>
1011
#include <executorch/runtime/core/error.h>
1112
#include <executorch/runtime/core/evalue.h>
12-
13-
#include <dlfcn.h>
13+
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
1414
#include <unistd.h>
1515
#include <cstdio>
1616

@@ -105,15 +105,29 @@ class CudaBackend final : public ::executorch::runtime::BackendInterface {
105105
FreeableBuffer* processed, // This will be a empty buffer
106106
ArrayRef<CompileSpec> compile_specs // This will be my empty list
107107
) const override {
108-
const NamedDataMap* named_data_map = context.get_named_data_map();
109-
110-
string so_blob_key = "so_blob";
111-
112-
Result<FreeableBuffer> aoti_cuda_buffer =
113-
named_data_map->get_data(so_blob_key.c_str());
108+
std::string method_name;
109+
for (const CompileSpec& spec : compile_specs) {
110+
if (std::strcmp(spec.key, "method_name") == 0) {
111+
method_name.assign(
112+
static_cast<const char*>(spec.value.buffer),
113+
spec.value.nbytes); // no nullptr guarantee, so pass size
114+
break;
115+
}
116+
}
114117

115-
ET_CHECK_OK_OR_RETURN_ERROR(aoti_cuda_buffer);
118+
std::string so_blob_key =
119+
method_name.empty() ? "so_blob" : method_name + "_so_blob";
116120

121+
const NamedDataMap* named_data_map = context.get_named_data_map();
122+
auto aoti_cuda_buffer = named_data_map->get_data(so_blob_key.c_str());
123+
if (!aoti_cuda_buffer.ok()) {
124+
ET_LOG(
125+
Error,
126+
"Failed to get data for key %s: 0x%x",
127+
so_blob_key.c_str(),
128+
aoti_cuda_buffer.error());
129+
return aoti_cuda_buffer.error();
130+
}
117131
// Generate dynamic temporary file path
118132
filesystem::path temp_dir = filesystem::temp_directory_path();
119133
filesystem::path so_path =
@@ -226,7 +240,7 @@ class CudaBackend final : public ::executorch::runtime::BackendInterface {
226240
return Error::Internal;
227241
}
228242
}
229-
243+
ET_LOG(Info, "Inputs copied to GPU");
230244
// Process output tensors: create GPU counterparts for ExecutorTorch CPU
231245
// tensors
232246
for (int i = 0; i < n_outputs; i++) {
@@ -255,7 +269,7 @@ class CudaBackend final : public ::executorch::runtime::BackendInterface {
255269

256270
gpu_outputs[i] = gpu_output_handle;
257271
}
258-
272+
ET_LOG(Info, "Outputs created on GPU");
259273
// Run AOTI container with GPU tensors
260274
AOTIRuntimeError error = AOTInductorModelContainerRun(
261275
handle->container_handle,
@@ -277,11 +291,15 @@ class CudaBackend final : public ::executorch::runtime::BackendInterface {
277291
// Copy GPU output results back to CPU output tensors
278292
for (int i = 0; i < n_outputs; i++) {
279293
auto cpu_output_tensor = &(args[i + n_inputs]->toTensor());
280-
Error copy_err = aoti_torch_copy_(cpu_output_tensor, gpu_outputs[i], 0);
281-
if (copy_err != Error::Ok) {
282-
ET_LOG(Error, "Failed to copy GPU output %d back to CPU", i);
283-
return Error::Internal;
284-
}
294+
// For DYNAMIC_BOUND tensors we try to resize
295+
ET_CHECK_OK_OR_RETURN_ERROR(
296+
resize_tensor(*cpu_output_tensor, gpu_outputs[i]->sizes()),
297+
"Error resizing tensor at output index %d",
298+
i);
299+
ET_CHECK_OK_OR_RETURN_ERROR(
300+
aoti_torch_copy_(cpu_output_tensor, gpu_outputs[i], 0),
301+
"Failed to copy GPU output %d back to CPU",
302+
i);
285303
}
286304

287305
// Clean up GPU tensors that we created (ExecutorTorch tensors are always

0 commit comments

Comments
 (0)