Skip to content

Commit 29c8fbe

Browse files
authored
HIP: bump requirement to rocm 6.1 (#15296)
1 parent 1adc981 commit 29c8fbe

File tree

5 files changed

+8
-62
lines changed

5 files changed

+8
-62
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ jobs:
443443
444444
ubuntu-22-cmake-hip:
445445
runs-on: ubuntu-22.04
446-
container: rocm/dev-ubuntu-22.04:6.0.2
446+
container: rocm/dev-ubuntu-22.04:6.1.2
447447

448448
steps:
449449
- name: Clone
@@ -471,16 +471,6 @@ jobs:
471471
-DGGML_HIP=ON
472472
cmake --build build --config Release -j $(nproc)
473473
474-
- name: Build with legacy HIP support
475-
id: cmake_build_legacy_hip
476-
run: |
477-
cmake -B build2 -S . \
478-
-DCMAKE_C_COMPILER=hipcc \
479-
-DCMAKE_CXX_COMPILER=hipcc \
480-
-DGGML_HIP_ROCWMMA_FATTN=ON \
481-
-DGGML_HIP=ON
482-
cmake --build build2 --config Release -j $(nproc)
483-
484474
ubuntu-22-cmake-musa:
485475
runs-on: ubuntu-22.04
486476
container: mthreads/musa:rc4.2.0-devel-ubuntu22.04-amd64

ggml/src/ggml-cuda/common.cuh

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -464,25 +464,21 @@ static __device__ __forceinline__ half ggml_cuda_hmax(const half a, const half b
464464
}
465465

466466
static __device__ __forceinline__ half2 ggml_cuda_hmax2(const half2 a, const half2 b) {
467-
#if defined(GGML_USE_HIP) && HIP_VERSION >= 50700000
467+
#if defined(GGML_USE_HIP)
468468
return half2(__hmax(a.x, b.x), __hmax(a.y, b.y));
469-
#elif !defined(GGML_USE_HIP) && CUDART_VERSION >= CUDART_HMAX
469+
#elif CUDART_VERSION >= CUDART_HMAX
470470
return __hmax2(a, b);
471-
#elif !defined(GGML_USE_HIP)
471+
#else
472472
half2 ret;
473473
reinterpret_cast<half&>(ret.x) = __float2half(fmaxf( __low2float(a), __low2float(b)));
474474
reinterpret_cast<half&>(ret.y) = __float2half(fmaxf(__high2float(a), __high2float(b)));
475475
return ret;
476-
#else
477-
GGML_UNUSED(a);
478-
GGML_UNUSED(b);
479-
NO_DEVICE_CODE;
480476
#endif
481477
}
482478

483479
template<int width = WARP_SIZE>
484480
static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
485-
#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000)
481+
#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
486482
#pragma unroll
487483
for (int offset = width/2; offset > 0; offset >>= 1) {
488484
x = ggml_cuda_hmax2(x, __shfl_xor_sync(0xffffffff, x, offset, width));
@@ -491,7 +487,7 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
491487
#else
492488
GGML_UNUSED(x);
493489
NO_DEVICE_CODE;
494-
#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000)
490+
#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
495491
}
496492

497493
#if CUDART_VERSION < CUDART_HMASK

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -180,30 +180,6 @@ static int ggml_cuda_parse_id(char devName[]) {
180180
#endif // defined(GGML_USE_HIP)
181181

182182
static ggml_cuda_device_info ggml_cuda_init() {
183-
#if defined(GGML_USE_HIP)
184-
// Workaround for a rocBLAS bug when using multiple graphics cards:
185-
// https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
186-
{
187-
int major_version = 0;
188-
size_t version_length = 0;
189-
if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) {
190-
std::vector<char> version(version_length+1, '\0');
191-
if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) {
192-
version.resize(::strlen(version.data()));
193-
int parsed_value = 0;
194-
if (std::from_chars(version.data(), version.data() + version.size(), parsed_value).ec == std::errc()) {
195-
major_version = parsed_value;
196-
}
197-
}
198-
}
199-
if (major_version < 4) {
200-
GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n");
201-
rocblas_initialize();
202-
CUDA_CHECK(cudaDeviceSynchronize());
203-
}
204-
}
205-
#endif
206-
207183
ggml_cuda_device_info info = {};
208184

209185
cudaError_t err = cudaGetDeviceCount(&info.device_count);

ggml/src/ggml-cuda/vendors/hip.h

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
#include <hipblas/hipblas.h>
66
#include <hip/hip_fp16.h>
77
#include <hip/hip_bfloat16.h>
8-
// for rocblas_initialize()
9-
#include "rocblas/rocblas.h"
108

119
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
1210
#define CUBLAS_GEMM_DEFAULT_TENSOR_OP HIPBLAS_GEMM_DEFAULT
@@ -251,17 +249,3 @@ static __device__ __forceinline__ unsigned int __vcmpne4(unsigned int a, unsigne
251249
}
252250
return c;
253251
}
254-
255-
#if HIP_VERSION < 50600000
256-
// __shfl_xor() for half2 was added in ROCm 5.6
257-
static __device__ __forceinline__ half2 __shfl_xor(half2 var, int laneMask, int width) {
258-
typedef union half2_b32 {
259-
half2 val;
260-
int b32;
261-
} half2_b32_t;
262-
half2_b32_t tmp;
263-
tmp.val = var;
264-
tmp.b32 = __shfl_xor(tmp.b32, laneMask, width);
265-
return tmp.val;
266-
}
267-
#endif // HIP_VERSION < 50600000

ggml/src/ggml-hip/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ if (GGML_HIP_ROCWMMA_FATTN)
4646
endif()
4747
endif()
4848

49-
if (${hip_VERSION} VERSION_LESS 5.5)
50-
message(FATAL_ERROR "At least ROCM/HIP V5.5 is required")
49+
if (${hip_VERSION} VERSION_LESS 6.1)
50+
message(FATAL_ERROR "At least ROCM/HIP V6.1 is required")
5151
endif()
5252

5353
message(STATUS "HIP and hipBLAS found")

0 commit comments

Comments
 (0)