@@ -260,8 +260,8 @@ int cuspread3d_blockgather_prop(int nf1, int nf2, int nf3, int M,
260260 cudaMemcpyDeviceToHost, stream))))
261261 return ier;
262262 cudaStreamSynchronize (stream);
263- if ((ier = checkCudaErrors (
264- cudaMallocAsync (&d_idxnupts, totalNUpts * sizeof ( int ), stream))))
263+ if ((ier = checkCudaErrors (cudaMallocWrapper (&d_idxnupts, totalNUpts * sizeof ( int ),
264+ stream, d_plan-> supports_pools ))))
265265 return ier;
266266
267267 calc_inverse_of_global_sort_index_ghost<<<(M + 1024 - 1 ) / 1024 , 1024 , 0 , stream>>> (
@@ -320,7 +320,8 @@ int cuspread3d_blockgather_prop(int nf1, int nf2, int nf3, int M,
320320 return ier;
321321 cudaStreamSynchronize (stream);
322322 if ((ier = checkCudaErrors (
323- cudaMallocAsync (&d_subprob_to_bin, totalnumsubprob * sizeof (int ), stream))))
323+ cudaMallocWrapper (&d_subprob_to_bin, totalnumsubprob * sizeof (int ), stream,
324+ d_plan->supports_pools ))))
324325 return ier;
325326 map_b_into_subprob_3d_v1<<<(n + 1024 - 1 ) / 1024 , 1024 , 0 , stream>>> (
326327 d_subprob_to_bin, d_subprobstartpts, d_numsubprob, n);
@@ -474,8 +475,8 @@ int cuspread3d_subprob_prop(int nf1, int nf2, int nf3, int M,
474475 sizeof (int ), cudaMemcpyDeviceToHost, stream)))
475476 return FINUFFT_ERR_CUDA_FAILURE;
476477 cudaStreamSynchronize (stream);
477- if (checkCudaErrors (
478- cudaMallocAsync (&d_subprob_to_bin, totalnumsubprob * sizeof ( int ), stream)))
478+ if (checkCudaErrors (cudaMallocWrapper (&d_subprob_to_bin, totalnumsubprob * sizeof ( int ),
479+ stream, d_plan-> supports_pools )))
479480 return FINUFFT_ERR_CUDA_FAILURE;
480481
481482 map_b_into_subprob_3d_v2<<<(numbins[0 ] * numbins[1 ] + 1024 - 1 ) / 1024 , 1024 , 0 ,
0 commit comments