Skip to content

Commit 3aac2c3

Browse files
Two-stage IPO for NVHPC (#581)
Co-authored-by: Cameron <[email protected]>
1 parent 7bdf4e3 commit 3aac2c3

File tree

11 files changed

+195
-185
lines changed

11 files changed

+195
-185
lines changed

.github/workflows/bench.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ jobs:
5252
5353
- name: Generate & Post Comment
5454
run: |
55-
. ./mfc.sh load -c p -m g
56-
./mfc.sh bench_diff master/bench-${{ matrix.device }}.yaml pr/bench-${{ matrix.device }}.yaml
55+
(cd pr && . ./mfc.sh load -c p -m g)
56+
(cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}.yaml ../pr/bench-${{ matrix.device }}.yaml)
5757
5858
- name: Archive Logs
5959
uses: actions/upload-artifact@v3

CMakeLists.txt

Lines changed: 130 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -172,16 +172,13 @@ elseif ((CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_Fortran_COMPILER_
172172
add_compile_options(
173173
$<$<COMPILE_LANGUAGE:Fortran>:-Mfreeform>
174174
$<$<COMPILE_LANGUAGE:Fortran>:-cpp>
175-
-Minfo=accel
175+
$<$<COMPILE_LANGUAGE:Fortran>:-Minfo=inline>
176+
$<$<COMPILE_LANGUAGE:Fortran>:-Minfo=accel>
176177
)
177178

178-
if (CMAKE_BUILD_TYPE STREQUAL "Release")
179-
add_compile_options(
180-
$<$<COMPILE_LANGUAGE:Fortran:-minline>
181-
)
182-
elseif (CMAKE_BUILD_TYPE STREQUAL "Debug")
179+
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
183180
add_compile_options(
184-
$<$<COMPILE_LANGUAGE:Fortran:-O0>
181+
$<$<COMPILE_LANGUAGE:Fortran>:-O0>
185182
)
186183
endif()
187184

@@ -208,13 +205,22 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
208205
endif()
209206

210207
# Enable LTO/IPO if supported
211-
CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR)
212-
if (SUPPORTS_IPO)
213-
message(STATUS "Enabled IPO / LTO")
214-
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
208+
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
209+
if (MFC_Unified)
210+
message(STATUS "IPO is not available with NVHPC using Unified Memory")
211+
else()
212+
message(STATUS "Performing IPO using -Mextract followed by -Minline")
213+
set(NVHPC_USE_TWO_PASS_IPO TRUE)
214+
endif()
215215
else()
216-
message(STATUS "IPO / LTO is NOT available")
217-
endif()
216+
CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR)
217+
if (SUPPORTS_IPO)
218+
message(STATUS "Enabled IPO / LTO")
219+
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
220+
else()
221+
message(STATUS "IPO / LTO is NOT available")
222+
endif()
223+
endif()
218224
endif()
219225

220226
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -365,124 +371,139 @@ function(MFC_SETUP_TARGET)
365371
cmake_parse_arguments(ARGS "OpenACC;MPI;SILO;HDF5;FFTW" "TARGET" "SOURCES" ${ARGN})
366372

367373
add_executable(${ARGS_TARGET} ${ARGS_SOURCES})
368-
369-
set_target_properties(${ARGS_TARGET} PROPERTIES Fortran_PREPROCESS ON)
370-
371-
target_include_directories(${ARGS_TARGET} PRIVATE
372-
"${CMAKE_SOURCE_DIR}/src/common"
373-
"${CMAKE_SOURCE_DIR}/src/common/include"
374-
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}")
375-
376-
if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
377-
target_include_directories(${ARGS_TARGET} PRIVATE
378-
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
374+
set(IPO_TARGETS ${ARGS_TARGET})
375+
# Here we need to split into "library" and "executable" to perform IPO on the NVIDIA compiler.
376+
# A little hacky, but it *is* an edge-case for *one* compiler.
377+
if (NVHPC_USE_TWO_PASS_IPO)
378+
add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES})
379+
target_compile_options(${ARGS_TARGET}_lib PRIVATE
380+
$<$<COMPILE_LANGUAGE:Fortran>:-Mextract=lib:${ARGS_TARGET}_lib>
381+
$<$<COMPILE_LANGUAGE:Fortran>:-Minline>
382+
)
383+
add_dependencies(${ARGS_TARGET} ${ARGS_TARGET}_lib)
384+
target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib)
385+
list(PREPEND IPO_TARGETS ${ARGS_TARGET}_lib)
379386
endif()
380387

381-
string(TOUPPER "${ARGS_TARGET}" ${ARGS_TARGET}_UPPER)
382-
target_compile_definitions(
383-
${ARGS_TARGET} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID}
384-
MFC_${${ARGS_TARGET}_UPPER}
385-
)
388+
foreach (a_target ${IPO_TARGETS})
389+
set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON)
386390

387-
if (MFC_MPI AND ARGS_MPI)
388-
find_package(MPI COMPONENTS Fortran REQUIRED)
391+
target_include_directories(${a_target} PRIVATE
392+
"${CMAKE_SOURCE_DIR}/src/common"
393+
"${CMAKE_SOURCE_DIR}/src/common/include"
394+
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}")
389395

390-
target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_MPI)
391-
target_link_libraries (${ARGS_TARGET} PRIVATE MPI::MPI_Fortran)
392-
endif()
396+
if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
397+
target_include_directories(${a_target} PRIVATE
398+
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
399+
endif()
393400

394-
if (ARGS_SILO)
395-
find_package(SILO REQUIRED)
396-
target_link_libraries(${ARGS_TARGET} PRIVATE SILO::SILO)
397-
endif()
401+
string(TOUPPER "${ARGS_TARGET}" ${ARGS_TARGET}_UPPER)
402+
target_compile_definitions(
403+
${a_target} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID}
404+
MFC_${${ARGS_TARGET}_UPPER}
405+
)
398406

399-
if (ARGS_HDF5)
400-
find_package(HDF5 REQUIRED)
401-
target_link_libraries(${ARGS_TARGET} PRIVATE HDF5::HDF5)
402-
endif()
407+
if (MFC_MPI AND ARGS_MPI)
408+
find_package(MPI COMPONENTS Fortran REQUIRED)
403409

404-
if (ARGS_FFTW)
405-
if (MFC_OpenACC AND ARGS_OpenACC)
406-
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
407-
find_package(CUDAToolkit REQUIRED)
408-
target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::cudart CUDA::cufft)
409-
else()
410-
find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED)
411-
target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hipfft)
412-
endif()
413-
else()
414-
find_package(FFTW REQUIRED)
415-
target_link_libraries(${ARGS_TARGET} PRIVATE FFTW::FFTW)
410+
target_compile_definitions(${a_target} PRIVATE MFC_MPI)
411+
target_link_libraries (${a_target} PRIVATE MPI::MPI_Fortran)
416412
endif()
417-
endif()
418413

419-
if (MFC_OpenACC AND ARGS_OpenACC)
420-
find_package(OpenACC)
414+
if (ARGS_SILO)
415+
find_package(SILO REQUIRED)
416+
target_link_libraries(${a_target} PRIVATE SILO::SILO)
417+
endif()
421418

422-
# This should be equivalent to if (NOT OpenACC_FC_FOUND)
423-
if (NOT TARGET OpenACC::OpenACC_Fortran)
424-
message(FATAL_ERROR "OpenACC + Fortran is unsupported.")
419+
if (ARGS_HDF5)
420+
find_package(HDF5 REQUIRED)
421+
target_link_libraries(${a_target} PRIVATE HDF5::HDF5)
425422
endif()
426423

427-
target_link_libraries(${ARGS_TARGET} PRIVATE OpenACC::OpenACC_Fortran)
428-
target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_OpenACC)
429-
430-
if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
431-
# FIXME: This should work with other cards than gfx90a ones.
432-
target_compile_options(${ARGS_TARGET} PRIVATE
433-
"-foffload=amdgcn-amdhsa='-march=gfx90a'"
434-
"-foffload-options=-lgfortran\ -lm"
435-
"-fno-exceptions")
436-
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
437-
find_package(cuTENSOR)
438-
if (NOT cuTENSOR_FOUND)
439-
message(WARNING
440-
"Failed to locate the NVIDIA cuTENSOR library. MFC will be "
441-
"built without support for it, disallowing the use of "
442-
"cu_tensor=T. This can result in degraded performance.")
424+
if (ARGS_FFTW)
425+
if (MFC_OpenACC AND ARGS_OpenACC)
426+
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
427+
find_package(CUDAToolkit REQUIRED)
428+
target_link_libraries(${a_target} PRIVATE CUDA::cudart CUDA::cufft)
429+
else()
430+
find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED)
431+
target_link_libraries(${a_target} PRIVATE hipfort::hipfft)
432+
endif()
443433
else()
444-
target_link_libraries (${ARGS_TARGET} PRIVATE cuTENSOR::cuTENSOR)
445-
target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_cuTENSOR)
434+
find_package(FFTW REQUIRED)
435+
target_link_libraries(${a_target} PRIVATE FFTW::FFTW)
446436
endif()
437+
endif()
447438

448-
foreach (cc ${MFC_CUDA_CC})
449-
target_compile_options(${ARGS_TARGET}
450-
PRIVATE -gpu=cc${cc}
451-
)
452-
endforeach()
453-
454-
target_compile_options(${ARGS_TARGET}
455-
PRIVATE -gpu=keep,ptxinfo,lineinfo
456-
)
439+
if (MFC_OpenACC AND ARGS_OpenACC)
440+
find_package(OpenACC)
457441

458-
# GH-200 Unified Memory Support
459-
if (MFC_Unified)
460-
target_compile_options(${ARGS_TARGET}
461-
PRIVATE -gpu=unified
462-
)
463-
# "This option must appear in both the compile and link lines" -- NVHPC Docs
464-
target_link_options(${ARGS_TARGET}
465-
PRIVATE -gpu=unified
466-
)
442+
# This should be equivalent to if (NOT OpenACC_FC_FOUND)
443+
if (NOT TARGET OpenACC::OpenACC_Fortran)
444+
message(FATAL_ERROR "OpenACC + Fortran is unsupported.")
467445
endif()
468446

469-
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
470-
target_compile_options(${ARGS_TARGET}
471-
PRIVATE -gpu=autocompare,debug
447+
target_link_libraries(${a_target} PRIVATE OpenACC::OpenACC_Fortran)
448+
target_compile_definitions(${a_target} PRIVATE MFC_OpenACC)
449+
450+
if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
451+
# FIXME: This should work with other cards than gfx90a ones.
452+
target_compile_options(${a_target} PRIVATE
453+
"-foffload=amdgcn-amdhsa='-march=gfx90a'"
454+
"-foffload-options=-lgfortran\ -lm"
455+
"-fno-exceptions")
456+
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
457+
find_package(cuTENSOR)
458+
if (NOT cuTENSOR_FOUND)
459+
message(WARNING
460+
"Failed to locate the NVIDIA cuTENSOR library. MFC will be "
461+
"built without support for it, disallowing the use of "
462+
"cu_tensor=T. This can result in degraded performance.")
463+
else()
464+
target_link_libraries (${a_target} PRIVATE cuTENSOR::cuTENSOR)
465+
target_compile_definitions(${a_target} PRIVATE MFC_cuTENSOR)
466+
endif()
467+
468+
foreach (cc ${MFC_CUDA_CC})
469+
target_compile_options(${a_target}
470+
PRIVATE -gpu=cc${cc}
471+
)
472+
endforeach()
473+
474+
target_compile_options(${a_target}
475+
PRIVATE -gpu=keep,ptxinfo,lineinfo
472476
)
477+
478+
# GH-200 Unified Memory Support
479+
if (MFC_Unified)
480+
target_compile_options(${ARGS_TARGET}
481+
PRIVATE -gpu=unified
482+
)
483+
# "This option must appear in both the compile and link lines" -- NVHPC Docs
484+
target_link_options(${ARGS_TARGET}
485+
PRIVATE -gpu=unified
486+
)
487+
endif()
488+
489+
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
490+
target_compile_options(${a_target}
491+
PRIVATE -gpu=autocompare,debug
492+
)
493+
endif()
494+
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
495+
find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
496+
target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn)
473497
endif()
474-
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
475-
find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
476-
target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hip hipfort::hipfort-amdgcn)
498+
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
499+
target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")
477500
endif()
478-
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
479-
target_compile_options(${ARGS_TARGET} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")
480-
endif()
481501

482-
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
483-
find_package(CUDAToolkit REQUIRED)
484-
target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::nvToolsExt)
485-
endif()
502+
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
503+
find_package(CUDAToolkit REQUIRED)
504+
target_link_libraries(${a_target} PRIVATE CUDA::nvToolsExt)
505+
endif()
506+
endforeach()
486507

487508
install(TARGETS ${ARGS_TARGET} RUNTIME DESTINATION bin)
488509
endfunction()

src/common/include/inline_conversions.fpp

Lines changed: 0 additions & 57 deletions
This file was deleted.

src/common/m_helper_basic.f90

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ module m_helper_basic
2525
!! @param tol_input Relative error (default = 1d-6).
2626
!! @return Result of the comparison.
2727
logical function f_approx_equal(a, b, tol_input) result(res)
28+
!$acc routine seq
2829
! Reference: https://floating-point-gui.de/errors/comparison/
2930

3031
real(kind(0d0)), intent(in) :: a, b

0 commit comments

Comments
 (0)