Skip to content

Commit 1e2ee90

Browse files
committed
[slimtensor] Add aoti_torch_copy_ for SlimTensor
Add SlimTensor-based `aoti_torch_copy_()` - Copies data from source tensor to destination tensor. Delegates to SlimTensor's `copy_()` which handles all device combinations (CPU-CPU, CPU-CUDA, CUDA-CPU, CUDA-CUDA). Differential Revision: [D90126246](https://our.internmc.facebook.com/intern/diff/D90126246/) [ghstack-poisoned]
1 parent 092b946 commit 1e2ee90

File tree

4 files changed

+523
-0
lines changed

4 files changed

+523
-0
lines changed

backends/cuda/runtime/shims/memory_slim.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,26 @@ AOTITorchError aoti_torch__reinterpret_tensor(
186186
return Error::Ok;
187187
}
188188

189+
AOTITorchError
190+
aoti_torch_copy_(Tensor* self, Tensor* src, int32_t non_blocking) {
191+
(void)non_blocking; // SlimTensor::copy_() is always synchronous for now
192+
193+
ET_CHECK_OR_RETURN_ERROR(
194+
self != nullptr, InvalidArgument, "aoti_torch_copy_: self is null");
195+
196+
ET_CHECK_OR_RETURN_ERROR(
197+
src != nullptr, InvalidArgument, "aoti_torch_copy_: src is null");
198+
199+
// SlimTensor::copy_() handles:
200+
// - Same numel validation
201+
// - Same dtype validation
202+
// - CPU-CPU, CPU-CUDA, CUDA-CPU, CUDA-CUDA copies
203+
// - Contiguous fast path and non-contiguous element-wise copy
204+
self->copy_(*src);
205+
206+
return Error::Ok;
207+
}
208+
189209
} // extern "C"
190210

191211
} // namespace executorch::backends::cuda

backends/cuda/runtime/shims/memory_slim.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,21 @@ AOTI_SHIM_EXPORT AOTITorchError aoti_torch__reinterpret_tensor(
128128
int64_t storage_offset,
129129
Tensor** ret_new_tensor);
130130

131+
/**
132+
* Copies data from source tensor to destination tensor.
133+
*
134+
* Handles all device combinations (CPU-CPU, CPU-CUDA, CUDA-CPU, CUDA-CUDA)
135+
* and supports tensors with different strides. The destination tensor must
136+
* already be allocated with sufficient storage.
137+
*
138+
* @param self Destination tensor (must not be null)
139+
* @param src Source tensor to copy from (must not be null)
140+
* @param non_blocking If true, the copy may be asynchronous (currently ignored)
141+
* @return AOTITorchError error code (Error::Ok on success)
142+
*/
143+
AOTI_SHIM_EXPORT AOTITorchError
144+
aoti_torch_copy_(Tensor* self, Tensor* src, int32_t non_blocking);
145+
131146
} // extern "C"
132147

133148
} // namespace executorch::backends::cuda

backends/cuda/runtime/shims/tests/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,4 @@ def define_common_targets():
7676
cuda_shim_slim_cpp_unittest("aoti_torch_delete_tensor_object")
7777
cuda_shim_slim_cpp_unittest("aoti_torch_new_tensor_handle")
7878
cuda_shim_slim_cpp_unittest("aoti_torch__reinterpret_tensor")
79+
cuda_shim_slim_cpp_unittest("aoti_torch_copy_")

0 commit comments

Comments
 (0)