|
| 1 | +set(LIBOMPTARGET_BUILD_DEVICERTL_BCLIB TRUE CACHE BOOL |
| 2 | + "Can be set to false to disable building this library.") |
| 3 | + |
| 4 | +if (NOT LIBOMPTARGET_BUILD_DEVICERTL_BCLIB) |
| 5 | + message(STATUS "Not building DeviceRTL: Disabled by LIBOMPTARGET_BUILD_DEVICERTL_BCLIB") |
| 6 | + return() |
| 7 | +endif() |
| 8 | + |
| 9 | +# Check to ensure the host system is a supported host architecture. |
| 10 | +if(NOT ${CMAKE_SIZEOF_VOID_P} EQUAL "8") |
| 11 | + message(STATUS "Not building DeviceRTL: Runtime does not support 32-bit hosts") |
| 12 | + return() |
| 13 | +endif() |
| 14 | + |
| 15 | +if (LLVM_DIR) |
| 16 | + # Builds that use pre-installed LLVM have LLVM_DIR set. |
| 17 | + # A standalone or LLVM_ENABLE_RUNTIMES=openmp build takes this route |
| 18 | + find_program(CLANG_TOOL clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) |
| 19 | +elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING AND NOT OPENMP_STANDALONE_BUILD) |
| 20 | + # LLVM in-tree builds may use CMake target names to discover the tools. |
| 21 | + # A LLVM_ENABLE_PROJECTS=openmp build takes this route |
| 22 | + set(CLANG_TOOL $<TARGET_FILE:clang>) |
| 23 | +else() |
| 24 | + message(STATUS "Not building DeviceRTL. No appropriate clang found") |
| 25 | + return() |
| 26 | +endif() |
| 27 | + |
| 28 | +set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR}) |
| 29 | +set(include_directory ${devicertl_base_directory}/include) |
| 30 | +set(source_directory ${devicertl_base_directory}/src) |
| 31 | + |
| 32 | +if(OFFLOAD_ENABLE_EMISSARY_APIS) |
| 33 | + set(emissary_includes ${include_directory}/EmissaryIds.h) |
| 34 | + set(emissary_sources src/EmissaryFortrt.cpp src/EmissaryPrint.cpp) |
| 35 | +endif() |
| 36 | + |
| 37 | +set(include_files |
| 38 | + ${include_directory}/Allocator.h |
| 39 | + ${include_directory}/Configuration.h |
| 40 | + ${include_directory}/Platform.h |
| 41 | + ${include_directory}/Debug.h |
| 42 | + ${include_directory}/Interface.h |
| 43 | + ${include_directory}/LibC.h |
| 44 | + ${include_directory}/Mapping.h |
| 45 | + ${include_directory}/Profiling.h |
| 46 | + ${include_directory}/State.h |
| 47 | + ${include_directory}/Synchronization.h |
| 48 | + ${include_directory}/DeviceTypes.h |
| 49 | + ${include_directory}/DeviceUtils.h |
| 50 | + ${include_directory}/Xteamr.h |
| 51 | + ${include_directory}/Xteams.h |
| 52 | + ${include_directory}/Workshare.h |
| 53 | + ${emissary_includes} |
| 54 | +) |
| 55 | + |
| 56 | +set(src_files |
| 57 | + ${source_directory}/Allocator.cpp |
| 58 | + ${source_directory}/Configuration.cpp |
| 59 | + ${source_directory}/Debug.cpp |
| 60 | + ${source_directory}/Kernel.cpp |
| 61 | + ${source_directory}/LibC.cpp |
| 62 | + ${source_directory}/LibM.cpp |
| 63 | + ${source_directory}/Mapping.cpp |
| 64 | + ${source_directory}/Misc.cpp |
| 65 | + ${source_directory}/Parallelism.cpp |
| 66 | + ${source_directory}/Profiling.cpp |
| 67 | + ${source_directory}/Reduction.cpp |
| 68 | + ${source_directory}/State.cpp |
| 69 | + ${source_directory}/Synchronization.cpp |
| 70 | + ${source_directory}/Tasking.cpp |
| 71 | + ${source_directory}/DeviceUtils.cpp |
| 72 | + ${source_directory}/Workshare.cpp |
| 73 | + ${source_directory}/ExtraMapping.cpp |
| 74 | + ${source_directory}/Xteamr.cpp |
| 75 | + ${source_directory}/Memory.cpp |
| 76 | + ${source_directory}/Xteams.cpp |
| 77 | + ${emissary_sources} |
| 78 | +) |
| 79 | + |
| 80 | +# We disable the slp vectorizer during the runtime optimization to avoid |
| 81 | +# vectorized accesses to the shared state. Generally, those are "good" but |
| 82 | +# the optimizer pipeline (esp. Attributor) does not fully support vectorized |
| 83 | +# instructions yet and we end up missing out on way more important constant |
| 84 | +# propagation. That said, we will run the vectorizer again after the runtime |
| 85 | +# has been linked into the user program. |
| 86 | +set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512 -mllvm -vectorize-slp=false ) |
| 87 | + |
| 88 | +# If the user built with the GPU C library enabled we will use that instead. |
| 89 | +if(${LIBOMPTARGET_GPU_LIBC_SUPPORT}) |
| 90 | + list(APPEND clang_opt_flags -DOMPTARGET_HAS_LIBC) |
| 91 | +endif() |
| 92 | + |
| 93 | +# Set flags for LLVM Bitcode compilation. |
| 94 | +set(bc_flags -c -flto -std=c++17 -fvisibility=hidden |
| 95 | + ${clang_opt_flags} -nogpulib -nostdlibinc |
| 96 | + -fno-rtti -fno-exceptions -fconvergent-functions |
| 97 | + -Wno-unknown-cuda-version |
| 98 | + -DOMPTARGET_DEVICE_RUNTIME |
| 99 | + -I${include_directory} |
| 100 | + -I${devicertl_base_directory}/../include |
| 101 | + -I${devicertl_base_directory}/../../libc |
| 102 | +) |
| 103 | + |
| 104 | +if(${LIBOMPTARGET_DEVICE_DEBUG}) |
| 105 | + list(APPEND bc_flags -DOMPTARGET_DEBUG=-1) |
| 106 | +else() |
| 107 | + list(APPEND bc_flags -DOMPTARGET_DEBUG=0) |
| 108 | +endif() |
| 109 | + |
| 110 | +# first create an object target |
| 111 | +add_library(omptarget.devicertl.all_objs OBJECT IMPORTED) |
| 112 | +function(compileDeviceRTLLibrary target_name target_triple) |
| 113 | + set(target_bc_flags ${ARGN}) |
| 114 | + |
| 115 | + if(${target_name} MATCHES "amdgpu") |
| 116 | + find_package(AMDDeviceLibs REQUIRED CONFIG |
| 117 | + HINTS ${CMAKE_BINARY_DIR}/../../tools/rocm-device-libs |
| 118 | + ${CMAKE_BINARY_DIR}/../rocm-device-libs-prefix/src/rocm-device-libs-build |
| 119 | + ${CMAKE_INSTALL_PREFIX} |
| 120 | + ) |
| 121 | + get_target_property(_ocml_bc ocml IMPORTED_LOCATION) |
| 122 | + get_target_property(_ockl_bc ockl IMPORTED_LOCATION) |
| 123 | + if(NOT _ockl_bc) |
| 124 | + message(FATAL_ERROR "Could not find ockl.bc") |
| 125 | + endif() |
| 126 | + if(NOT _ocml_bc) |
| 127 | + message(FATAL_ERROR "Could not find ocml.bc") |
| 128 | + endif() |
| 129 | + list(APPEND target_bc_flags -Xclang -mlink-builtin-bitcode -Xclang ${_ockl_bc}) |
| 130 | + list(APPEND target_bc_flags -Xclang -mlink-builtin-bitcode -Xclang ${_ocml_bc}) |
| 131 | + endif() |
| 132 | + |
| 133 | + foreach(src ${src_files}) |
| 134 | + get_filename_component(infile ${src} ABSOLUTE) |
| 135 | + get_filename_component(outfile ${src} NAME) |
| 136 | + set(outfile "${outfile}-${target_name}.o") |
| 137 | + set(depfile "${outfile}.d") |
| 138 | + |
| 139 | + # Passing an empty CPU to -march= suppressed target specific metadata. |
| 140 | + add_custom_command(OUTPUT ${outfile} |
| 141 | + COMMAND ${CLANG_TOOL} |
| 142 | + ${bc_flags} |
| 143 | + --target=${target_triple} |
| 144 | + ${target_bc_flags} |
| 145 | + -MD -MF ${depfile} |
| 146 | + ${infile} -o ${outfile} |
| 147 | + DEPENDS ${infile} |
| 148 | + DEPFILE ${depfile} |
| 149 | + COMMENT "Building LLVM bitcode ${outfile}" |
| 150 | + VERBATIM |
| 151 | + ) |
| 152 | + if(TARGET clang) |
| 153 | + # Add a file-level dependency to ensure that clang is up-to-date. |
| 154 | + # By default, add_custom_command only builds clang if the |
| 155 | + # executable is missing. |
| 156 | + add_custom_command(OUTPUT ${outfile} |
| 157 | + DEPENDS clang |
| 158 | + APPEND |
| 159 | + ) |
| 160 | + endif() |
| 161 | + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}) |
| 162 | + |
| 163 | + list(APPEND obj_files ${CMAKE_CURRENT_BINARY_DIR}/${outfile}) |
| 164 | + endforeach() |
| 165 | + # Trick to combine these into a bitcode file via the linker's LTO pass. This |
| 166 | + # is used to provide the legacy `libomptarget-<name>.bc` files. Hack this |
| 167 | + # through as an executable to get it to use the relocatable link. |
| 168 | + add_executable(libomptarget-${target_name}) |
| 169 | + target_sources(libomptarget-${target_name} PRIVATE ${obj_files}) |
| 170 | + set_target_properties(libomptarget-${target_name} PROPERTIES |
| 171 | + RUNTIME_OUTPUT_DIRECTORY ${LIBOMPTARGET_LLVM_LIBRARY_INTDIR} |
| 172 | + LINKER_LANGUAGE CXX |
| 173 | + BUILD_RPATH "" |
| 174 | + INSTALL_RPATH "" |
| 175 | + RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc) |
| 176 | + target_compile_options(libomptarget-${target_name} PRIVATE |
| 177 | + "--target=${target_triple}" "-fuse-ld=lld" "-march=" "-mcpu=" |
| 178 | + "-Wno-unused-command-line-argument") |
| 179 | + target_link_options(libomptarget-${target_name} PRIVATE |
| 180 | + "--target=${target_triple}" "-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm" |
| 181 | + "-Wl,--lto-newpm-passes=default<O3>" "-Wl,-plugin-opt=-openmp-opt-disable" |
| 182 | + "-Wl,-plugin-opt=-attributor-enable=module" |
| 183 | + "-Wl,-plugin-opt=-vectorize-slp=false" "-fuse-ld=lld" "-march=" "-mcpu=") |
| 184 | + install(TARGETS libomptarget-${target_name} |
| 185 | + PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ |
| 186 | + DESTINATION "lib${LLVM_LIBDIR_SUFFIX}/${target_triple}") |
| 187 | + |
| 188 | + add_library(omptarget.${target_name}.all_objs OBJECT IMPORTED) |
| 189 | + set_property(TARGET omptarget.${target_name}.all_objs APPEND PROPERTY IMPORTED_OBJECTS |
| 190 | + ${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}/libomptarget-${target_name}.bc) |
| 191 | + add_dependencies(omptarget.${target_name}.all_objs libomptarget-${target_name}) |
| 192 | + |
| 193 | + # Archive all the object files generated above into a static library |
| 194 | + add_library(omptarget.${target_name} STATIC) |
| 195 | + set_target_properties(omptarget.${target_name} PROPERTIES |
| 196 | + ARCHIVE_OUTPUT_DIRECTORY "${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}/${target_triple}" |
| 197 | + ARCHIVE_OUTPUT_NAME ompdevice |
| 198 | + LINKER_LANGUAGE CXX |
| 199 | + ) |
| 200 | + add_dependencies(omptarget.${target_name} libomptarget-${target_name}) |
| 201 | + target_link_libraries(omptarget.${target_name} PRIVATE omptarget.${target_name}.all_objs) |
| 202 | + target_link_options(omptarget.${target_name} PRIVATE "--target=${target_triple}" |
| 203 | + "-Wno-unused-command-line-argument" "-r" "-nostdlib" "-flto" |
| 204 | + "-Wl,--lto-emit-llvm" "-fuse-ld=lld" "-march=" "-mcpu=") |
| 205 | + |
| 206 | + install(TARGETS omptarget.${target_name} |
| 207 | + ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}/${target_triple}") |
| 208 | + |
| 209 | + if (CMAKE_EXPORT_COMPILE_COMMANDS) |
| 210 | + set(ide_target_name omptarget-ide-${target_name}) |
| 211 | + add_library(${ide_target_name} STATIC EXCLUDE_FROM_ALL ${src_files}) |
| 212 | + target_compile_options(${ide_target_name} PRIVATE |
| 213 | + -fvisibility=hidden --target=${target_triple} |
| 214 | + -nogpulib -nostdlibinc -Wno-unknown-cuda-version |
| 215 | + ) |
| 216 | + target_compile_definitions(${ide_target_name} PRIVATE SHARED_SCRATCHPAD_SIZE=512) |
| 217 | + target_include_directories(${ide_target_name} PRIVATE |
| 218 | + ${include_directory} |
| 219 | + ${devicertl_base_directory}/../../libc |
| 220 | + ${devicertl_base_directory}/../include |
| 221 | + ) |
| 222 | + install(TARGETS ${ide_target_name} EXCLUDE_FROM_ALL) |
| 223 | + endif() |
| 224 | +endfunction() |
| 225 | + |
| 226 | +if(NOT LLVM_TARGETS_TO_BUILD OR "AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) |
| 227 | + compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none) |
| 228 | +endif() |
| 229 | + |
| 230 | +if(NOT LLVM_TARGETS_TO_BUILD OR "NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) |
| 231 | + compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63) |
| 232 | +endif() |
0 commit comments