Skip to content

Commit 527808e

Browse files
committed
Revert 11362
1 parent c3a6af3 commit 527808e

File tree

3 files changed

+20
-83
lines changed

3 files changed

+20
-83
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,7 @@ struct ggml_tensor_extra_gpu {
588588
};
589589

590590

591-
#if ((CUDART_VERSION >= 12000) && defined(GGML_CUDA_USE_GRAPHS)) || defined(GGML_HIP_GRAPHS)
591+
#if (CUDART_VERSION >= 12000) && defined(GGML_CUDA_USE_GRAPHS)
592592
#define USE_CUDA_GRAPH
593593
#endif
594594

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 19 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size");
6464
[[noreturn]]
6565
void ggml_cuda_error(const char * stmt, const char * func, const char * file, int line, const char * msg) {
6666
int id = -1; // in case cudaGetDevice fails
67-
(void)cudaGetDevice(&id);
67+
cudaGetDevice(&id);
6868

6969
GGML_LOG_ERROR(GGML_CUDA_NAME " error: %s\n", msg);
7070
GGML_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line);
@@ -155,7 +155,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
155155
for (int id = 0; id < info.device_count; ++id) {
156156
int device_vmm = 0;
157157

158-
#if !defined(GGML_CUDA_NO_VMM)
158+
#if !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
159159
CUdevice device;
160160
CU_CHECK(cuDeviceGet(&device, id));
161161
CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
@@ -167,7 +167,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
167167
alloc_prop.location.id = id;
168168
CU_CHECK(cuMemGetAllocationGranularity(&info.devices[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
169169
}
170-
#endif // !defined(GGML_CUDA_NO_VMM)
170+
#endif // !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
171171
info.devices[id].vmm = !!device_vmm;
172172

173173
cudaDeviceProp prop;
@@ -301,7 +301,7 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
301301
};
302302

303303
// pool with virtual memory
304-
#if !defined(GGML_CUDA_NO_VMM)
304+
#if !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
305305
struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
306306
static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
307307

@@ -310,9 +310,6 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
310310
size_t pool_used = 0;
311311
size_t pool_size = 0;
312312
size_t granularity;
313-
#if defined(GGML_USE_HIP)
314-
std::vector<std::pair<CUdeviceptr, size_t>> mappings;
315-
#endif
316313

317314
explicit ggml_cuda_pool_vmm(int device) :
318315
device(device),
@@ -321,14 +318,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
321318

322319
~ggml_cuda_pool_vmm() {
323320
if (pool_addr != 0) {
324-
#if defined(GGML_USE_HIP)
325-
// Workaround for https://github.com/ROCm/ROCR-Runtime/issues/285
326-
for (std::pair<CUdeviceptr, size_t> & mapping : mappings) {
327-
CU_CHECK(cuMemUnmap(mapping.first, mapping.second));
328-
}
329-
#else
330321
CU_CHECK(cuMemUnmap(pool_addr, pool_size));
331-
#endif
332322
CU_CHECK(cuMemAddressFree(pool_addr, CUDA_POOL_VMM_MAX_SIZE));
333323
}
334324
}
@@ -361,11 +351,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
361351
}
362352

363353
// map at the end of the pool
364-
CUdeviceptr start_ptr = (CUdeviceptr)((char *)(pool_addr) + pool_size);
365-
CU_CHECK(cuMemMap(start_ptr, reserve_size, 0, handle, 0));
366-
#if defined(GGML_USE_HIP)
367-
mappings.push_back({start_ptr, reserve_size});
368-
#endif
354+
CU_CHECK(cuMemMap(pool_addr + pool_size, reserve_size, 0, handle, 0));
369355

370356
// the memory allocation handle is no longer needed after mapping
371357
CU_CHECK(cuMemRelease(handle));
@@ -375,7 +361,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
375361
access.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
376362
access.location.id = device;
377363
access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
378-
CU_CHECK(cuMemSetAccess((CUdeviceptr)((char *)(pool_addr) + pool_size), reserve_size, &access, 1));
364+
CU_CHECK(cuMemSetAccess(pool_addr + pool_size, reserve_size, &access, 1));
379365

380366
// add to the pool
381367
pool_size += reserve_size;
@@ -387,7 +373,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
387373

388374
GGML_ASSERT(pool_addr != 0);
389375

390-
void * ptr = (void *) ((CUdeviceptr)((char *)(pool_addr) + pool_used));
376+
void * ptr = (void *) (pool_addr + pool_used);
391377
*actual_size = size;
392378
pool_used += size;
393379

@@ -406,17 +392,17 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
406392
pool_used -= size;
407393

408394
// all deallocations must be in reverse order of the allocations
409-
GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used));
395+
GGML_ASSERT(ptr == (void *) (pool_addr + pool_used));
410396
}
411397
};
412-
#endif // !defined(GGML_CUDA_NO_VMM)
398+
#endif // !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
413399

414400
std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
415-
#if !defined(GGML_CUDA_NO_VMM)
401+
#if !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
416402
if (ggml_cuda_info().devices[device].vmm) {
417403
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
418404
}
419-
#endif // !defined(GGML_CUDA_NO_VMM)
405+
#endif // !defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)
420406
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_leg(device));
421407
}
422408

@@ -562,7 +548,7 @@ static ggml_backend_buffer_t ggml_backend_cuda_buffer_type_alloc_buffer(ggml_bac
562548
cudaError_t err = ggml_cuda_device_malloc(&dev_ptr, size, buft_ctx->device);
563549
if (err != cudaSuccess) {
564550
// clear the error
565-
(void)cudaGetLastError();
551+
cudaGetLastError();
566552
GGML_LOG_ERROR("%s: allocating %.2f MiB on device %d: cudaMalloc failed: %s\n", __func__, size / 1024.0 / 1024.0, buft_ctx->device, cudaGetErrorString(err));
567553
return nullptr;
568554
}
@@ -977,7 +963,7 @@ static void * ggml_cuda_host_malloc(size_t size) {
977963
cudaError_t err = cudaMallocHost((void **) &ptr, size);
978964
if (err != cudaSuccess) {
979965
// clear the error
980-
(void)cudaGetLastError();
966+
cudaGetLastError();
981967
GGML_LOG_DEBUG("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
982968
size / 1024.0 / 1024.0, cudaGetErrorString(err));
983969
return nullptr;
@@ -1224,15 +1210,15 @@ static void ggml_cuda_set_peer_access(const int n_tokens, int main_device) {
12241210
CUDA_CHECK(err);
12251211
} else {
12261212
// reset the error
1227-
(void)cudaGetLastError();
1213+
cudaGetLastError();
12281214
}
12291215
} else {
12301216
cudaError_t err = cudaDeviceDisablePeerAccess(id_other);
12311217
if (err != cudaErrorPeerAccessNotEnabled) {
12321218
CUDA_CHECK(err);
12331219
} else {
12341220
// reset the error
1235-
(void)cudaGetLastError();
1221+
cudaGetLastError();
12361222
}
12371223
}
12381224
}
@@ -2471,7 +2457,7 @@ static void maintain_cuda_graph(ggml_backend_cuda_context * cuda_ctx, std::vecto
24712457
if (stat == cudaErrorInvalidDeviceFunction) {
24722458
// Fails due to incorrect handling by CUDA runtime of CUDA BLAS node.
24732459
// We don't need to update blas nodes, so clear error and move on.
2474-
(void)cudaGetLastError();
2460+
cudaGetLastError();
24752461
} else {
24762462
GGML_ASSERT(stat == cudaSuccess);
24772463
}
@@ -2526,20 +2512,14 @@ static bool is_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx,
25262512
static void update_cuda_graph_executable(ggml_backend_cuda_context * cuda_ctx) {
25272513

25282514
cudaGraphExecUpdateResultInfo result_info;
2529-
#ifdef __HIP_PLATFORM_AMD__
2530-
hipGraphNode_t errorNode;
2531-
hipError_t stat = hipGraphExecUpdate(cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, &errorNode, &result_info);
2532-
#else
25332515
cudaError_t stat = cudaGraphExecUpdate(cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, &result_info);
2534-
#endif
25352516
if (stat == cudaErrorGraphExecUpdateFailure) {
25362517
#ifndef NDEBUG
25372518
GGML_LOG_DEBUG("%s: CUDA graph update failed\n", __func__);
25382519
#endif
2539-
25402520
// The pre-existing graph exec cannot be updated due to violated constraints
25412521
// so instead clear error and re-instantiate
2542-
(void)cudaGetLastError();
2522+
cudaGetLastError();
25432523
CUDA_CHECK(cudaGraphExecDestroy(cuda_ctx->cuda_graph->instance));
25442524
cuda_ctx->cuda_graph->instance = nullptr;
25452525
CUDA_CHECK(cudaGraphInstantiate(&cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, NULL, NULL, 0));
@@ -2767,7 +2747,7 @@ bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size) {
27672747
cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterPortable | cudaHostRegisterReadOnly);
27682748
if (err != cudaSuccess) {
27692749
// clear the error
2770-
(void)cudaGetLastError();
2750+
cudaGetLastError();
27712751

27722752
GGML_LOG_DEBUG("%s: failed to register %.2f MiB of pinned memory: %s\n", __func__,
27732753
size / 1024.0 / 1024.0, cudaGetErrorString(err));
@@ -2787,7 +2767,7 @@ void ggml_backend_cuda_unregister_host_buffer(void * buffer) {
27872767
cudaError_t err = cudaHostUnregister(buffer);
27882768
if (err != cudaSuccess) {
27892769
// clear the error
2790-
(void)cudaGetLastError();
2770+
cudaGetLastError();
27912771
}
27922772
}
27932773

ggml/src/ggml-cuda/vendors/hip.h

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,6 @@
1919
#define CUBLAS_TF32_TENSOR_OP_MATH 0
2020
#define CUDA_R_16F HIPBLAS_R_16F
2121
#define CUDA_R_32F HIPBLAS_R_32F
22-
#define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED hipDeviceAttributeVirtualMemoryManagementSupported
23-
#define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED hipMemAllocationGranularityRecommended
24-
#define CU_MEM_ALLOCATION_TYPE_PINNED hipMemAllocationTypePinned
25-
#define CU_MEM_LOCATION_TYPE_DEVICE hipMemLocationTypeDevice
26-
#define CU_MEM_ACCESS_FLAGS_PROT_READWRITE hipMemAccessFlagsProtReadWrite
27-
#define CU_CHECK(fn) {hipError_t err = fn; if(err != hipSuccess) { GGML_ABORT("HipVMM Failure: %s\n", hipGetErrorString(err)); }}
2822
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
2923
#define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6
3024
#define cublasCreate hipblasCreate
@@ -80,50 +74,13 @@
8074
#define cudaMemGetInfo hipMemGetInfo
8175
#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
8276
#define cudaSetDevice hipSetDevice
83-
#define cuDeviceGet hipDeviceGet
84-
#define CUdevice hipDevice_t
85-
#define CUdeviceptr hipDeviceptr_t
86-
#define cuMemUnmap hipMemUnmap
87-
#define CUmemAccessDesc hipMemAccessDesc
88-
#define cuMemAddressFree hipMemAddressFree
89-
#define cuMemRelease hipMemRelease
90-
#define CUmemGenericAllocationHandle hipMemGenericAllocationHandle_t
91-
#define cuMemCreate hipMemCreate
92-
#define cuMemAddressReserve hipMemAddressReserve
93-
#define cuMemMap hipMemMap
94-
#define cuMemSetAccess hipMemSetAccess
95-
#define cuMemGetAllocationGranularity hipMemGetAllocationGranularity
96-
#define CUmemAllocationProp hipMemAllocationProp
97-
#define cuDeviceGetAttribute hipDeviceGetAttribute
9877
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
9978
#define cudaStreamDestroy hipStreamDestroy
10079
#define cudaStreamFireAndForget hipStreamFireAndForget
10180
#define cudaStreamNonBlocking hipStreamNonBlocking
10281
#define cudaStreamPerThread hipStreamPerThread
10382
#define cudaStreamSynchronize hipStreamSynchronize
10483
#define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags)
105-
#define cudaGraphExec_t hipGraphExec_t
106-
#define cudaGraphNode_t hipGraphNode_t
107-
#define cudaKernelNodeParams hipKernelNodeParams
108-
#define cudaKernelNodeParams hipKernelNodeParams
109-
#define cudaGraphExecDestroy hipGraphExecDestroy
110-
#define cudaGraphLaunch hipGraphLaunch
111-
#define cudaErrorGraphExecUpdateFailure hipErrorGraphExecUpdateFailure
112-
#define cudaGraphExecUpdateResultInfo hipGraphExecUpdateResult
113-
#define cudaGraphNodeType hipGraphNodeType
114-
#define cudaGraphNodeTypeKernel hipGraphNodeTypeKernel
115-
#define cudaGraphInstantiate hipGraphInstantiate
116-
#define cudaStreamEndCapture hipStreamEndCapture
117-
#define cudaGraphDestroy hipGraphDestroy
118-
#define cudaGraphKernelNodeSetParams hipGraphKernelNodeSetParams
119-
#define cudaErrorInvalidDeviceFunction hipErrorInvalidDeviceFunction
120-
#define cudaGraphKernelNodeGetParams hipGraphKernelNodeGetParams
121-
#define cudaGraphNodeGetType hipGraphNodeGetType
122-
#define cudaGraphGetNodes hipGraphGetNodes
123-
#define cudaGraphExecUpdate hipGraphExecUpdate
124-
#define cudaStreamCaptureModeRelaxed hipStreamCaptureModeRelaxed
125-
#define cudaStreamBeginCapture hipStreamBeginCapture
126-
#define cudaGraph_t hipGraph_t
12784
#define cudaStream_t hipStream_t
12885
#define cudaSuccess hipSuccess
12986
#define __trap() do { abort(); __builtin_unreachable(); } while(0)

0 commit comments

Comments
 (0)