Skip to content

Commit 80ffabd

Browse files
committed
Enable ROCm/HIP GPU acceleration
Signed-off-by: Gavin Zhao <[email protected]>
1 parent 0b297fb commit 80ffabd

File tree

11 files changed

+7586
-0
lines changed

11 files changed

+7586
-0
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ option(Other_Optimization_For_Other "Enable other optimizatio flag for other if
3939
option(Enable_IPO "Enable link time optimization, may cause unknown bugs even slow performance" OFF)
4040
option(Maximum_Optimization "Enable maximum optimizations for the current platform" OFF)
4141
option(Enable_CUDA "Enable CUDA module" OFF)
42+
option(Enable_HIP "Enable HIP module" OFF)
4243
option(Enable_OpenCL "Enable OpenCL module" ON)
4344
option(Enable_NCNN "Enable ncnn module" OFF)
4445
option(Enable_Fast_Math "Enable fast math" OFF)
@@ -89,6 +90,8 @@ if(Enable_CUDA)
8990
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_Minimum_CC}-virtual ${CUDA_CC}-real)
9091
endif()
9192
enable_language(CUDA)
93+
elseif(Enable_HIP)
94+
enable_language(HIP)
9295
endif()
9396

9497
if(Use_Eigen3)
@@ -308,6 +311,7 @@ message(STATUS
308311
" Enable IPO ${Enable_IPO}\n"
309312
" Enable OpenCL ${Enable_OpenCL}\n"
310313
" Enable CUDA ${Enable_CUDA}\n"
314+
" Enable HIP ${Enable_HIP}\n"
311315
" Enable NCNN ${Enable_NCNN}\n"
312316
" Enable OpenCV DNN ${Enable_OpenCV_DNN}\n"
313317
" Enable video ${Enable_Video}\n"

core/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ if(Build_Static_Core)
88
add_library(${PROJECT_NAME} STATIC
99
${INCLUDE} ${SOURCE}
1010
$<$<BOOL:${Enable_CUDA}>:$<TARGET_OBJECTS:CUDA_Module>>
11+
$<$<BOOL:${Enable_HIP}>:$<TARGET_OBJECTS:HIP_Module>>
1112
)
1213
else()
1314
add_library(${PROJECT_NAME} SHARED
1415
${INCLUDE} ${SOURCE}
1516
$<$<BOOL:${Enable_CUDA}>:$<TARGET_OBJECTS:CUDA_Module>>
17+
$<$<BOOL:${Enable_HIP}>:$<TARGET_OBJECTS:HIP_Module>>
1618
)
1719
endif()
1820

@@ -21,6 +23,9 @@ target_compile_definitions(
2123
PUBLIC
2224
$<$<BOOL:${Enable_OpenCL}>:ENABLE_OPENCL>
2325
$<$<BOOL:${Enable_CUDA}>:ENABLE_CUDA>
26+
$<$<BOOL:${Enable_HIP}>:ENABLE_HIP>
27+
# This allows HIP to emulate as CUDA
28+
$<$<BOOL:${Enable_HIP}>:ENABLE_CUDA>
2429
$<$<BOOL:${Enable_NCNN}>:ENABLE_NCNN>
2530
$<$<BOOL:${Enable_Video}>:ENABLE_VIDEO>
2631
$<$<BOOL:${Enable_Preview_GUI}>:ENABLE_PREVIEW_GUI>

hip/CMakeLists.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
if(Enable_HIP)
2+
project(HIP_Module LANGUAGES CXX HIP)
3+
4+
file(GLOB SOURCES src/*.hip)
5+
file(GLOB INCLUDE include/*.hpp include/*.hip)
6+
7+
source_group("hip_include" FILES ${INCLUDE})
8+
source_group("hip_source" FILES ${SOURCE})
9+
10+
message(STATUS "CMAKE_HIP_COMPILER: ${CMAKE_HIP_COMPILER}")
11+
message(STATUS "CMAKE_HIP_COMPILER_ID: ${CMAKE_HIP_COMPILER_ID}")
12+
message(STATUS "CMAKE_HIP_COMPILER_VERSION: ${CMAKE_HIP_COMPILER_VERSION}")
13+
message(STATUS "HIP SOURCES: ${SOURCES}")
14+
message(STATUS "HIP INCLUDE: ${INCLUDE}")
15+
16+
add_library(${PROJECT_NAME} OBJECT ${INCLUDE} ${SOURCES})
17+
18+
target_include_directories(${PROJECT_NAME} PRIVATE ${TOP_DIR}/core/include include)
19+
20+
target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_CUDA ENABLE_HIP)
21+
22+
set_target_properties(
23+
${PROJECT_NAME} PROPERTIES
24+
HIP_STANDARD 17
25+
)
26+
27+
if((NOT Build_Static_Core) OR (Build_Static_Core AND Build_Static_Core_PIC))
28+
set_target_properties(
29+
${PROJECT_NAME} PROPERTIES
30+
POSITION_INDEPENDENT_CODE True
31+
)
32+
endif()
33+
endif()

hip/include/ACNetCommon.hpp

Lines changed: 316 additions & 0 deletions
Large diffs are not rendered by default.

hip/include/CudaHelper.hip

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#ifndef ANIME4KCPP_CUDA_CUDA_HELPER_CUH
2+
#define ANIME4KCPP_CUDA_CUDA_HELPER_CUH
3+
4+
#include <type_traits>
5+
#include <cstdint>
6+
7+
#include <hip/hip_runtime.h>
8+
#include <hip/hip_fp16.h>
9+
#include <hip/device_functions.h>
10+
#include <hip/amd_detail/amd_surface_functions.h>
11+
12+
#include "ACException.hpp"
13+
14+
#define CheckCudaErr(err) \
15+
if (err != hipSuccess) \
16+
throw Anime4KCPP::ACException<Anime4KCPP::ExceptionType::GPU, true>(hipGetErrorString(err), std::string(__FILE__), __LINE__)
17+
18+
using uchar = std::uint8_t;
19+
using ushort = std::uint16_t;
20+
21+
extern int currCudaDeviceID;
22+
23+
template <typename T>
24+
struct PixelValue;
25+
26+
template <>
27+
struct PixelValue<uchar>
28+
{
29+
__device__ static constexpr uchar max()
30+
{
31+
return 255;
32+
}
33+
34+
__device__ static constexpr uchar min()
35+
{
36+
return 0;
37+
}
38+
};
39+
40+
template <>
41+
struct PixelValue<ushort>
42+
{
43+
__device__ static constexpr ushort max()
44+
{
45+
return 65535;
46+
}
47+
48+
__device__ static constexpr ushort min()
49+
{
50+
return 0;
51+
}
52+
};
53+
54+
template <>
55+
struct PixelValue<float>
56+
{
57+
__device__ static constexpr float max()
58+
{
59+
return 1.0f;
60+
}
61+
62+
__device__ static constexpr float min()
63+
{
64+
return 0.0f;
65+
}
66+
};
67+
68+
template <typename T, int dim>
69+
struct Vec;
70+
71+
template <>
72+
struct Vec<uchar, 2>
73+
{
74+
using type = uchar2;
75+
};
76+
77+
template <>
78+
struct Vec<uchar, 4>
79+
{
80+
using type = uchar4;
81+
};
82+
83+
template <>
84+
struct Vec<ushort, 2>
85+
{
86+
using type = ushort2;
87+
};
88+
89+
template <>
90+
struct Vec<ushort, 4>
91+
{
92+
using type = ushort4;
93+
};
94+
95+
template <>
96+
struct Vec<float, 2>
97+
{
98+
using type = float2;
99+
};
100+
101+
template <>
102+
struct Vec<float, 4>
103+
{
104+
using type = float4;
105+
};
106+
107+
template <typename T>
108+
using Vec2 = Vec<T, 2>;
109+
110+
template <typename T>
111+
using Vec4 = Vec<T, 4>;
112+
113+
template <typename T>
114+
inline __device__ typename Vec4<T>::type makeVec4(T x, T y, T z, T w);
115+
116+
template <>
117+
inline __device__ typename Vec4<uchar>::type makeVec4(uchar x, uchar y, uchar z, uchar w)
118+
{
119+
return make_uchar4(x, y, z, w);
120+
}
121+
122+
template <>
123+
inline __device__ typename Vec4<ushort>::type makeVec4(ushort x, ushort y, ushort z, ushort w)
124+
{
125+
return make_ushort4(x, y, z, w);
126+
}
127+
128+
template <>
129+
inline __device__ typename Vec4<float>::type makeVec4(float x, float y, float z, float w)
130+
{
131+
return make_float4(x, y, z, w);
132+
}
133+
134+
inline __device__ float clamp(float f, float a, float b)
135+
{
136+
return fmaxf(a, fminf(f, b));
137+
}
138+
139+
#endif // !ANIME4KCPP_CUDA_CUDA_HELPER_CUH

0 commit comments

Comments
 (0)