Skip to content

Commit c141b0c

Browse files
committed
cudamalloc -> cudamallocAsync to boost perf
1 parent 054f249 commit c141b0c

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

backends/cuda/runtime/slim/core/Storage.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,11 @@ template <> struct DeviceTraits<executorch::backends::cuda::c10::DeviceType::CUD
5858
static void *allocate(size_t nbytes, const executorch::backends::cuda::c10::Device &device) {
5959
executorch::backends::cuda::slim::cuda::CUDAGuard guard(device);
6060
void *data = nullptr;
61-
STANDALONE_CUDA_CHECK(cudaMalloc(&data, nbytes));
61+
STANDALONE_CUDA_CHECK(cudaMallocAsync(&data, nbytes, cudaStreamDefault));
6262
return data;
6363
}
6464

65-
static void free(void *ptr) { STANDALONE_CUDA_CHECK_WARN(cudaFree(ptr)); }
65+
static void free(void *ptr) { STANDALONE_CUDA_CHECK_WARN(cudaFreeAsync(ptr, cudaStreamDefault)); }
6666

6767
static void memcpy(void *dst, const void *src, size_t nbytes,
6868
const executorch::backends::cuda::c10::Device &dst_device,

0 commit comments

Comments
 (0)