@@ -64,7 +64,7 @@ static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size");
6464[[noreturn]]
6565void ggml_cuda_error (const char * stmt, const char * func, const char * file, int line, const char * msg) {
6666 int id = -1 ; // in case cudaGetDevice fails
67- ( void ) cudaGetDevice (&id);
67+ cudaGetDevice (&id);
6868
6969 GGML_LOG_ERROR (GGML_CUDA_NAME " error: %s\n " , msg);
7070 GGML_LOG_ERROR (" current device: %d, in function %s at %s:%d\n " , id, func, file, line);
@@ -155,7 +155,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
155155 for (int id = 0 ; id < info.device_count ; ++id) {
156156 int device_vmm = 0 ;
157157
158- #if !defined(GGML_CUDA_NO_VMM)
158+ #if !defined(GGML_USE_HIP) && !defined( GGML_CUDA_NO_VMM)
159159 CUdevice device;
160160 CU_CHECK (cuDeviceGet (&device, id));
161161 CU_CHECK (cuDeviceGetAttribute (&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
@@ -167,7 +167,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
167167 alloc_prop.location .id = id;
168168 CU_CHECK (cuMemGetAllocationGranularity (&info.devices [id].vmm_granularity , &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
169169 }
170- #endif // !defined(GGML_CUDA_NO_VMM)
170+ #endif // !defined(GGML_USE_HIP) && !defined( GGML_CUDA_NO_VMM)
171171 info.devices [id].vmm = !!device_vmm;
172172
173173 cudaDeviceProp prop;
@@ -301,7 +301,7 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
301301};
302302
303303// pool with virtual memory
304- #if !defined(GGML_CUDA_NO_VMM)
304+ #if !defined(GGML_USE_HIP) && !defined( GGML_CUDA_NO_VMM)
305305struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
306306 static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35 ; // 32 GB
307307
@@ -310,9 +310,6 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
310310 size_t pool_used = 0 ;
311311 size_t pool_size = 0 ;
312312 size_t granularity;
313- #if defined(GGML_USE_HIP)
314- std::vector<std::pair<CUdeviceptr, size_t >> mappings;
315- #endif
316313
317314 explicit ggml_cuda_pool_vmm (int device) :
318315 device(device),
@@ -321,14 +318,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
321318
322319 ~ggml_cuda_pool_vmm () {
323320 if (pool_addr != 0 ) {
324- #if defined(GGML_USE_HIP)
325- // Workaround for https://github.com/ROCm/ROCR-Runtime/issues/285
326- for (std::pair<CUdeviceptr, size_t > & mapping : mappings) {
327- CU_CHECK (cuMemUnmap (mapping.first , mapping.second ));
328- }
329- #else
330321 CU_CHECK (cuMemUnmap (pool_addr, pool_size));
331- #endif
332322 CU_CHECK (cuMemAddressFree (pool_addr, CUDA_POOL_VMM_MAX_SIZE));
333323 }
334324 }
@@ -361,11 +351,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
361351 }
362352
363353 // map at the end of the pool
364- CUdeviceptr start_ptr = (CUdeviceptr)((char *)(pool_addr) + pool_size);
365- CU_CHECK (cuMemMap (start_ptr, reserve_size, 0 , handle, 0 ));
366- #if defined(GGML_USE_HIP)
367- mappings.push_back ({start_ptr, reserve_size});
368- #endif
354+ CU_CHECK (cuMemMap (pool_addr + pool_size, reserve_size, 0 , handle, 0 ));
369355
370356 // the memory allocation handle is no longer needed after mapping
371357 CU_CHECK (cuMemRelease (handle));
@@ -375,7 +361,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
375361 access.location .type = CU_MEM_LOCATION_TYPE_DEVICE;
376362 access.location .id = device;
377363 access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
378- CU_CHECK (cuMemSetAccess ((CUdeviceptr)(( char *)( pool_addr) + pool_size) , reserve_size, &access, 1 ));
364+ CU_CHECK (cuMemSetAccess (pool_addr + pool_size, reserve_size, &access, 1 ));
379365
380366 // add to the pool
381367 pool_size += reserve_size;
@@ -387,7 +373,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
387373
388374 GGML_ASSERT (pool_addr != 0 );
389375
390- void * ptr = (void *) ((CUdeviceptr)(( char *)( pool_addr) + pool_used) );
376+ void * ptr = (void *) (pool_addr + pool_used);
391377 *actual_size = size;
392378 pool_used += size;
393379
@@ -406,17 +392,17 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
406392 pool_used -= size;
407393
408394 // all deallocations must be in reverse order of the allocations
409- GGML_ASSERT (ptr == (void *) (( char *)( pool_addr) + pool_used));
395+ GGML_ASSERT (ptr == (void *) (pool_addr + pool_used));
410396 }
411397};
412- #endif // !defined(GGML_CUDA_NO_VMM)
398+ #endif // !defined(GGML_USE_HIP) && !defined( GGML_CUDA_NO_VMM)
413399
414400std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device (int device) {
415- #if !defined(GGML_CUDA_NO_VMM)
401+ #if !defined(GGML_USE_HIP) && !defined( GGML_CUDA_NO_VMM)
416402 if (ggml_cuda_info ().devices [device].vmm ) {
417403 return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm (device));
418404 }
419- #endif // !defined(GGML_CUDA_NO_VMM)
405+ #endif // !defined(GGML_USE_HIP) && !defined( GGML_CUDA_NO_VMM)
420406 return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_leg (device));
421407}
422408
@@ -562,7 +548,7 @@ static ggml_backend_buffer_t ggml_backend_cuda_buffer_type_alloc_buffer(ggml_bac
562548 cudaError_t err = ggml_cuda_device_malloc (&dev_ptr, size, buft_ctx->device );
563549 if (err != cudaSuccess) {
564550 // clear the error
565- ( void ) cudaGetLastError ();
551+ cudaGetLastError ();
566552 GGML_LOG_ERROR (" %s: allocating %.2f MiB on device %d: cudaMalloc failed: %s\n " , __func__, size / 1024.0 / 1024.0 , buft_ctx->device , cudaGetErrorString (err));
567553 return nullptr ;
568554 }
@@ -977,7 +963,7 @@ static void * ggml_cuda_host_malloc(size_t size) {
977963 cudaError_t err = cudaMallocHost ((void **) &ptr, size);
978964 if (err != cudaSuccess) {
979965 // clear the error
980- ( void ) cudaGetLastError ();
966+ cudaGetLastError ();
981967 GGML_LOG_DEBUG (" %s: failed to allocate %.2f MiB of pinned memory: %s\n " , __func__,
982968 size / 1024.0 / 1024.0 , cudaGetErrorString (err));
983969 return nullptr ;
@@ -1224,15 +1210,15 @@ static void ggml_cuda_set_peer_access(const int n_tokens, int main_device) {
12241210 CUDA_CHECK (err);
12251211 } else {
12261212 // reset the error
1227- ( void ) cudaGetLastError ();
1213+ cudaGetLastError ();
12281214 }
12291215 } else {
12301216 cudaError_t err = cudaDeviceDisablePeerAccess (id_other);
12311217 if (err != cudaErrorPeerAccessNotEnabled) {
12321218 CUDA_CHECK (err);
12331219 } else {
12341220 // reset the error
1235- ( void ) cudaGetLastError ();
1221+ cudaGetLastError ();
12361222 }
12371223 }
12381224 }
@@ -2471,7 +2457,7 @@ static void maintain_cuda_graph(ggml_backend_cuda_context * cuda_ctx, std::vecto
24712457 if (stat == cudaErrorInvalidDeviceFunction) {
24722458 // Fails due to incorrect handling by CUDA runtime of CUDA BLAS node.
24732459 // We don't need to update blas nodes, so clear error and move on.
2474- ( void ) cudaGetLastError ();
2460+ cudaGetLastError ();
24752461 } else {
24762462 GGML_ASSERT (stat == cudaSuccess);
24772463 }
@@ -2526,20 +2512,14 @@ static bool is_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx,
25262512static void update_cuda_graph_executable (ggml_backend_cuda_context * cuda_ctx) {
25272513
25282514 cudaGraphExecUpdateResultInfo result_info;
2529- #ifdef __HIP_PLATFORM_AMD__
2530- hipGraphNode_t errorNode;
2531- hipError_t stat = hipGraphExecUpdate (cuda_ctx->cuda_graph ->instance , cuda_ctx->cuda_graph ->graph , &errorNode, &result_info);
2532- #else
25332515 cudaError_t stat = cudaGraphExecUpdate (cuda_ctx->cuda_graph ->instance , cuda_ctx->cuda_graph ->graph , &result_info);
2534- #endif
25352516 if (stat == cudaErrorGraphExecUpdateFailure) {
25362517#ifndef NDEBUG
25372518 GGML_LOG_DEBUG (" %s: CUDA graph update failed\n " , __func__);
25382519#endif
2539-
25402520 // The pre-existing graph exec cannot be updated due to violated constraints
25412521 // so instead clear error and re-instantiate
2542- ( void ) cudaGetLastError ();
2522+ cudaGetLastError ();
25432523 CUDA_CHECK (cudaGraphExecDestroy (cuda_ctx->cuda_graph ->instance ));
25442524 cuda_ctx->cuda_graph ->instance = nullptr ;
25452525 CUDA_CHECK (cudaGraphInstantiate (&cuda_ctx->cuda_graph ->instance , cuda_ctx->cuda_graph ->graph , NULL , NULL , 0 ));
@@ -2767,7 +2747,7 @@ bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size) {
27672747 cudaError_t err = cudaHostRegister (buffer, size, cudaHostRegisterPortable | cudaHostRegisterReadOnly);
27682748 if (err != cudaSuccess) {
27692749 // clear the error
2770- ( void ) cudaGetLastError ();
2750+ cudaGetLastError ();
27712751
27722752 GGML_LOG_DEBUG (" %s: failed to register %.2f MiB of pinned memory: %s\n " , __func__,
27732753 size / 1024.0 / 1024.0 , cudaGetErrorString (err));
@@ -2787,7 +2767,7 @@ void ggml_backend_cuda_unregister_host_buffer(void * buffer) {
27872767 cudaError_t err = cudaHostUnregister (buffer);
27882768 if (err != cudaSuccess) {
27892769 // clear the error
2790- ( void ) cudaGetLastError ();
2770+ cudaGetLastError ();
27912771 }
27922772}
27932773
0 commit comments