@@ -47,7 +47,7 @@ Init() {
4747 if (!get_device_count ()) return ;
4848 checkCudaErrors (cudaStreamCreateWithFlags (&stream, cudaStreamNonBlocking));
4949 checkCudaErrors (cudaEventCreate (&event, cudaEventDisableTiming));
50- stream = aclstream;
50+ // stream = aclstream;
5151}
5252~Init () {
5353 if (!get_device_count ()) return ;
@@ -123,23 +123,23 @@ void FetchOp::run() {
123123 new (&allocation) Allocation (&cuda_dual_allocator, v->size );
124124 // mostly device to device
125125 #if IS_CUDA
126- // checkCudaErrors(cudaMemcpyAsync(
127- // allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDefault, stream));
128126 checkCudaErrors (cudaMemcpyAsync (
129- allocation.ptr , v->size , v->mem_ptr , v->size , cudaMemcpyDefault, aclstream));
130- checkCudaErrors (aclrtSynchronizeStream (aclstream));
127+ allocation.ptr , v->mem_ptr , v->size , cudaMemcpyDefault, stream));
128+ // checkCudaErrors(cudaMemcpyAsync(
129+ // allocation.ptr, v->size, v->mem_ptr, v->size, cudaMemcpyDefault, aclstream));
130+ // checkCudaErrors(aclrtSynchronizeStream(aclstream));
131131 #else
132132 checkCudaErrors (cudaMemcpyAsync (
133133 allocation.ptr , v->mem_ptr , v->size , cudaMemcpyDeviceToDevice, stream));
134134 #endif
135135 auto host_ptr = cuda_dual_allocator.get_dual_allocation (
136136 allocation.allocation ).host_ptr ;
137137 // device to host
138- // checkCudaErrors(cudaMemcpyAsync(
139- // host_ptr, allocation.ptr, v->size, cudaMemcpyDeviceToHost, stream));
140- checkCudaErrors (aclrtMemcpyAsync (
141- host_ptr, v->size , allocation.ptr , v->size , cudaMemcpyDeviceToHost, aclstream));
142- checkCudaErrors (aclrtSynchronizeStream (aclstream));
138+ checkCudaErrors (cudaMemcpyAsync (
139+ host_ptr, allocation.ptr , v->size , cudaMemcpyDeviceToHost, stream));
140+ // checkCudaErrors(aclrtMemcpyAsync(
141+ // host_ptr, v->size, allocation.ptr, v->size, cudaMemcpyDeviceToHost, aclstream));
142+ // checkCudaErrors(aclrtSynchronizeStream(aclstream));
143143 allocation.ptr = host_ptr;
144144 has_cuda_memcpy = true ;
145145 } else
0 commit comments