Skip to content
31 changes: 9 additions & 22 deletions backends/cuda/runtime/cuda_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,18 +286,6 @@ class ET_EXPERIMENTAL CudaBackend final
i);
}

// Clean up GPU tensors that we created (ExecuTorch tensors are always
// CPU, so all GPU tensors are our copies)
for (int i = 0; i < n_inputs; i++) {
// All GPU input tensors were created by us, delete them
aoti_torch_delete_tensor_object(gpu_inputs[i]);
}

for (int i = 0; i < n_outputs; i++) {
// All GPU output tensors were created by us, delete them
aoti_torch_delete_tensor_object(gpu_outputs[i]);
}

return Error::Ok;
}

Expand All @@ -318,16 +306,14 @@ class ET_EXPERIMENTAL CudaBackend final
handle->cuda_stream = nullptr;
}

// Delete the container BEFORE closing the shared library
if (handle->container_handle != nullptr) {
AOTIRuntimeError delete_result =
AOTInductorModelContainerDelete(handle->container_handle);
ET_CHECK_OR_LOG(
delete_result == Error::Ok,
"Failed to delete AOTInductorModelContainer with error code %d",
delete_result);
handle->container_handle = nullptr;
}
// We noticed that AOTInductorModelContainerDelete doesn't work well with
// mutitple .so files when we tried to use it to delete container handle,
// since freeing one of them will free some sharing resources, leading to
// segfault when trying to free the other .so files. Now we do not explicted
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can let AI proofread these.

Suggested change
// segfault when trying to free the other .so files. Now we do not explicted
// segfault when trying to free the other .so files. Now we do not explicitly

// delete the container and defer to OS to handle them.
// TODO(gasoonjia): find a better and safer solution to delete the
// container.
// AOTInductorModelContainerDelete(handle->container_handle);

// Now close the shared library
if (handle->so_handle != nullptr) {
Expand All @@ -345,6 +331,7 @@ class ET_EXPERIMENTAL CudaBackend final
}

delete handle;
clear_all_tensors();
}
};

Expand Down
15 changes: 11 additions & 4 deletions backends/cuda/runtime/shims/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,14 +271,21 @@ void clear_all_tensors() {
// Use aoti_torch_delete_tensor_object to properly delete each tensor
// Note: We need to collect tensor pointers first since deletion modifies the
// set
auto old_tensors =
std::move(tensors); // tensors is now empty and no need to copy
for (const auto& tensor_shared : old_tensors) {
aoti_torch_delete_tensor_object(tensor_shared.get());
std::vector<Tensor*> tensor_ptrs;
tensor_ptrs.reserve(tensors.size());
for (const auto& tensor_shared : tensors) {
tensor_ptrs.push_back(tensor_shared.get());
}

// Now delete each tensor - this will modify the global tensors set
for (Tensor* tensor_ptr : tensor_ptrs) {
aoti_torch_delete_tensor_object(tensor_ptr);
}

// tensors set should now be empty, but ensure it's cleared
tensors.clear();

ET_LOG(Info, "Cleared all tensors");
}

AOTITorchError aoti_torch_delete_tensor_object(Tensor* tensor) {
Expand Down
Loading