Skip to content

Commit 3c48b7e

Browse files
authored
Add support for compilation to ROCM (#344)
1 parent 6d10d94 commit 3c48b7e

24 files changed

+2219
-586
lines changed

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ cmake_minimum_required(VERSION 3.10)
22

33
include(CheckCXXSourceCompiles)
44

5-
set(POLYGEIST_ENABLE_CUDA 0 CACHE BOOL "Enable CUDA compilation support")
5+
set(POLYGEIST_ENABLE_CUDA 0 CACHE BOOL "Enable CUDA frontend and backend")
6+
set(POLYGEIST_ENABLE_ROCM 0 CACHE BOOL "Enable ROCM backend")
67

78
if(POLICY CMP0068)
89
cmake_policy(SET CMP0068 NEW)

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ ninja
3030
ninja check-mlir
3131
```
3232

33-
To enable compilation to cuda add `-DMLIR_ENABLE_CUDA_RUNNER=1` and remove `-DLLVM_TARGETS_TO_BUILD="host"` from the cmake arguments. (You may need to specify `CUDACXX`, `CUDA_PATH`, or `CMAKE_CUDA_COMPILER`)
33+
To enable compilation to cuda add `-DMLIR_ENABLE_CUDA_RUNNER=1` and remove `-DLLVM_TARGETS_TO_BUILD="host"` from the cmake arguments. (You may need to specify `CUDACXX`, `CUDA_PATH`, and/or `-DCMAKE_CUDA_COMPILER`)
34+
35+
To enable the ROCM backend add `-DMLIR_ENABLE_ROCM_RUNNER=1` and remove `-DLLVM_TARGETS_TO_BUILD="host"` from the cmake arguments. (You may need to specify `-DHIP_CLANG_INCLUDE_PATH`, and/or `ROCM_PATH`)
36+
37+
For faster compilation we recommend using `-DLLVM_USE_LINKER=lld`.
3438

3539
2. Build Polygeist:
3640
```sh
@@ -48,6 +52,10 @@ ninja check-polygeist-opt && ninja check-cgeist
4852

4953
To enable compilation to cuda add `-DPOLYGEIST_ENABLE_CUDA=1`
5054

55+
To enable the ROCM backend add `-DPOLYGEIST_ENABLE_ROCM=1`
56+
57+
For faster compilation we recommend using `-DPOLYGEIST_USE_LINKER=lld`.
58+
5159
#### Option 2: Using unified LLVM, MLIR, Clang, and Polygeist build
5260

5361
Polygeist can also be built as an external LLVM project using [LLVM_EXTERNAL_PROJECTS](https://llvm.org/docs/CMake.html#llvm-related-variables).

include/polygeist/Passes/Passes.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,21 +34,29 @@ std::unique_ptr<Pass> createRemoveTrivialUsePass();
3434
std::unique_ptr<Pass> createParallelLowerPass(
3535
bool wrapParallelOps = false,
3636
PolygeistGPUStructureMode gpuKernelStructureMode = PGSM_Discard);
37-
std::unique_ptr<Pass> createCudaRTLowerPass();
37+
std::unique_ptr<Pass> createConvertCudaRTtoCPUPass();
38+
std::unique_ptr<Pass> createConvertCudaRTtoGPUPass();
39+
std::unique_ptr<Pass> createConvertCudaRTtoHipRTPass();
3840
std::unique_ptr<Pass>
3941
createConvertPolygeistToLLVMPass(const LowerToLLVMOptions &options,
40-
bool useCStyleMemRef, bool onlyGpuModules);
42+
bool useCStyleMemRef, bool onlyGpuModules,
43+
std::string gpuTarget);
4144
std::unique_ptr<Pass> createConvertPolygeistToLLVMPass();
4245
std::unique_ptr<Pass> createForBreakToWhilePass();
4346
std::unique_ptr<Pass>
4447
createConvertParallelToGPUPass1(bool useOriginalThreadNums = false);
4548
std::unique_ptr<Pass>
4649
createConvertParallelToGPUPass2(bool emitGPUKernelLaunchBounds = true);
4750
std::unique_ptr<Pass> createGpuSerializeToCubinPass(
48-
StringRef triple, StringRef arch, StringRef features, int llvmOptLevel,
49-
int ptxasOptLevel, std::string ptxasPath, std::string libDevicePath,
50-
bool outputIntermediate);
51+
StringRef arch, StringRef features, int llvmOptLevel, int ptxasOptLevel,
52+
std::string ptxasPath, std::string libDevicePath, bool outputIntermediate);
53+
std::unique_ptr<Pass>
54+
createGpuSerializeToHsacoPass(StringRef arch, StringRef features,
55+
int llvmOptLevel, int hsaOptLevel,
56+
std::string rocmPath, bool outputIntermediate);
57+
5158
void registerGpuSerializeToCubinPass();
59+
void registerGpuSerializeToHsacoPass();
5260

5361
void populateForBreakToWhilePatterns(RewritePatternSet &patterns);
5462
} // namespace polygeist

include/polygeist/Passes/Passes.td

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,25 @@ def Mem2Reg : Pass<"mem2reg"> {
1313
let constructor = "mlir::polygeist::createMem2RegPass()";
1414
}
1515

16-
def CudaRTLower : Pass<"cudart-lower", "mlir::ModuleOp"> {
16+
def ConvertCudaRTtoCPU : Pass<"convert-cudart-to-cpu", "mlir::ModuleOp"> {
1717
let summary = "Lower cudart functions to cpu versions";
1818
let dependentDialects =
1919
["memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect"];
20-
let constructor = "mlir::polygeist::createCudaRTLowerPass()";
20+
let constructor = "mlir::polygeist::createConvertCudaRTtoCPUPass()";
21+
}
22+
23+
def ConvertCudaRTtoGPU : Pass<"convert-cudart-to-gpu", "mlir::ModuleOp"> {
24+
let summary = "Lower cudart functions to generic gpu versions";
25+
let dependentDialects =
26+
["memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect", "gpu::GPUDialect"];
27+
let constructor = "mlir::polygeist::createConvertCudaRTtoGPUPass()";
28+
}
29+
30+
def ConvertCudaRTtoHipRT : Pass<"convert-cudart-to-gpu", "mlir::ModuleOp"> {
31+
let summary = "Lower cudart functions to generic gpu versions";
32+
let dependentDialects =
33+
["memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect", "gpu::GPUDialect"];
34+
let constructor = "mlir::polygeist::createConvertCudaRTtoGPUPass()";
2135
}
2236

2337
def ParallelLower : Pass<"parallel-lower", "mlir::ModuleOp"> {

include/polygeist/PolygeistOps.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def GPUThreadOp : Polygeist_Op<"gpu_thread", [
100100
"Value":$threadIndexX, "Value":$threadIndexY, "Value":$threadIndexZ)>];
101101
}
102102

103-
def GPUAlternativesOp : Polygeist_Op<"gpu_alternatives", [
103+
def AlternativesOp : Polygeist_Op<"alternatives", [
104104
RecursiveMemoryEffects]> {
105105
let summary = "Provides several alternatives kernels for gpu code";
106106
let regions = (region VariadicRegion<SizedRegion<1>>:$regions);
@@ -141,7 +141,7 @@ def GPUErrorOp : Polygeist_Op<"gpu_error", [
141141
}
142142

143143
def PolygeistYieldOp : Polygeist_Op<"polygeist_yield", [Pure, ReturnLike, Terminator,
144-
ParentOneOf<["GPUAlternativesOp", "GPUWrapperOp", "GPUErrorOp", "GPUBlockOp", "GPUThreadOp"]>]> {
144+
ParentOneOf<["AlternativesOp", "GPUWrapperOp", "GPUErrorOp", "GPUBlockOp", "GPUThreadOp"]>]> {
145145
let summary = "Polygeist ops terminator";
146146
}
147147

lib/polygeist/ExecutionEngine/CMakeLists.txt

Lines changed: 57 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,9 @@ if(POLYGEIST_ENABLE_CUDA)
44
find_package(CUDA)
55
enable_language(CUDA)
66

7-
# Shared lib wrapper
8-
97
find_library(CUDA_RUNTIME_LIBRARY cuda)
108

11-
# add_mlir_library(polygeist_cuda_runtime
12-
# SHARED
13-
# CudaRuntimeWrappers.cpp
14-
15-
# EXCLUDE_FROM_LIBMLIR
16-
# )
17-
18-
# set_property(TARGET polygeist_cuda_runtime PROPERTY CXX_STANDARD 14)
19-
# target_include_directories(polygeist_cuda_runtime
20-
# PRIVATE
21-
# ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
22-
# )
23-
# target_link_libraries(polygeist_cuda_runtime
24-
# PRIVATE
25-
# ${CUDA_RUNTIME_LIBRARY}
26-
# )
27-
289
# Bitcode lib wrapper
29-
set(POLYGEIST_EXECUTION_ENGINE_BITCODE_DIR ${CMAKE_CURRENT_BINARY_DIR})
30-
3110
find_program(CLANG_TOOL clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
3211

3312
set(source_directory ${CMAKE_CURRENT_SOURCE_DIR})
@@ -74,3 +53,60 @@ if(POLYGEIST_ENABLE_CUDA)
7453

7554

7655
endif()
56+
if(POLYGEIST_ENABLE_ROCM)
57+
if (NOT DEFINED ROCM_PATH)
58+
if (NOT DEFINED ENV{ROCM_PATH})
59+
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed")
60+
else()
61+
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
62+
endif()
63+
endif()
64+
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} "${ROCM_PATH}/hip")
65+
find_package(hip REQUIRED)
66+
67+
# Bitcode lib wrapper
68+
find_program(CLANG_TOOL clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
69+
70+
set(source_directory ${CMAKE_CURRENT_SOURCE_DIR})
71+
set(src_files
72+
${source_directory}/RocmRuntimeWrappers.cpp
73+
)
74+
75+
set(bc_flags -c -emit-llvm -std=c++17 -fvisibility=hidden
76+
-O3
77+
#-nocudalib
78+
-D__HIP_PLATFORM_AMD__
79+
-Xclang -no-opaque-pointers
80+
-I${ROCM_PATH}/include
81+
-DPOLYGEIST_PGO_DEFAULT_DATA_DIR="${POLYGEIST_PGO_DEFAULT_DATA_DIR}"
82+
-DPOLYGEIST_PGO_ALTERNATIVE_ENV_VAR="${POLYGEIST_PGO_ALTERNATIVE_ENV_VAR}"
83+
-DPOLYGEIST_PGO_DATA_DIR_ENV_VAR="${POLYGEIST_PGO_DATA_DIR_ENV_VAR}"
84+
)
85+
86+
foreach(src ${src_files})
87+
get_filename_component(infile ${src} ABSOLUTE)
88+
get_filename_component(filename ${src} NAME)
89+
set(inc_outfile "${filename}.bin.h")
90+
set(bc_outfile "${filename}.bc")
91+
92+
add_custom_command(OUTPUT ${bc_outfile}
93+
COMMAND ${CLANG_TOOL}
94+
${bc_flags}
95+
${infile} -o ${bc_outfile}
96+
DEPENDS ${infile}
97+
COMMENT "Building LLVM bitcode ${bc_outfile}"
98+
VERBATIM
99+
)
100+
add_custom_target(${bc_outfile}_target DEPENDS ${bc_outfile})
101+
add_custom_command(OUTPUT ${inc_outfile}
102+
COMMAND ${XXD_BIN} -i ${bc_outfile} ${inc_outfile}
103+
DEPENDS ${bc_outfile}
104+
COMMENT "Generating C header ${inc_outfile}"
105+
VERBATIM
106+
)
107+
add_custom_target(execution_engine_rocm_wrapper_binary_include DEPENDS ${inc_outfile})
108+
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bc_outfile})
109+
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${inc_outfile})
110+
111+
endforeach()
112+
endif()

0 commit comments

Comments
 (0)