@@ -172,16 +172,13 @@ elseif ((CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_Fortran_COMPILER_
172172 add_compile_options (
173173 $<$<COMPILE_LANGUAGE:Fortran>:-Mfreeform>
174174 $<$<COMPILE_LANGUAGE:Fortran>:-cpp>
175- -Minfo=accel
175+ $<$<COMPILE_LANGUAGE:Fortran>:-Minfo=inline>
176+ $<$<COMPILE_LANGUAGE:Fortran>:-Minfo=accel>
176177 )
177178
178- if (CMAKE_BUILD_TYPE STREQUAL "Release" )
179- add_compile_options (
180- $<$<COMPILE_LANGUAGE:Fortran:-minline>
181- )
182- elseif (CMAKE_BUILD_TYPE STREQUAL "Debug" )
179+ if (CMAKE_BUILD_TYPE STREQUAL "Debug" )
183180 add_compile_options (
184- $<$<COMPILE_LANGUAGE:Fortran:-O0>
181+ $<$<COMPILE_LANGUAGE:Fortran> :-O0>
185182 )
186183 endif ()
187184
@@ -208,13 +205,22 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
208205 endif ()
209206
210207 # Enable LTO/IPO if supported
211- CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR)
212- if (SUPPORTS_IPO)
213- message (STATUS "Enabled IPO / LTO" )
214- set (CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE )
208+ if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" )
209+ if (MFC_Unified)
210+ message (STATUS "IPO is not available with NVHPC using Unified Memory" )
211+ else ()
212+ message (STATUS "Performing IPO using -Mextract followed by -Minline" )
213+ set (NVHPC_USE_TWO_PASS_IPO TRUE )
214+ endif ()
215215 else ()
216- message (STATUS "IPO / LTO is NOT available" )
217- endif ()
216+ CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR)
217+ if (SUPPORTS_IPO)
218+ message (STATUS "Enabled IPO / LTO" )
219+ set (CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE )
220+ else ()
221+ message (STATUS "IPO / LTO is NOT available" )
222+ endif ()
223+ endif ()
218224endif ()
219225
220226if (CMAKE_BUILD_TYPE STREQUAL "Debug" )
@@ -365,124 +371,139 @@ function(MFC_SETUP_TARGET)
365371 cmake_parse_arguments (ARGS "OpenACC;MPI;SILO;HDF5;FFTW" "TARGET" "SOURCES" ${ARGN} )
366372
367373 add_executable (${ARGS_TARGET} ${ARGS_SOURCES} )
368-
369- set_target_properties (${ARGS_TARGET} PROPERTIES Fortran_PREPROCESS ON )
370-
371- target_include_directories (${ARGS_TARGET} PRIVATE
372- "${CMAKE_SOURCE_DIR} /src/common"
373- "${CMAKE_SOURCE_DIR} /src/common/include"
374- "${CMAKE_SOURCE_DIR} /src/${ARGS_TARGET} " )
375-
376- if (EXISTS "${CMAKE_SOURCE_DIR} /src/${ARGS_TARGET} /include" )
377- target_include_directories (${ARGS_TARGET} PRIVATE
378- "${CMAKE_SOURCE_DIR} /src/${ARGS_TARGET} /include" )
374+ set (IPO_TARGETS ${ARGS_TARGET} )
375+ # Here we need to split into "library" and "executable" to perform IPO on the NVIDIA compiler.
376+ # A little hacky, but it *is* an edge-case for *one* compiler.
377+ if (NVHPC_USE_TWO_PASS_IPO)
378+ add_library (${ARGS_TARGET} _lib OBJECT ${ARGS_SOURCES} )
379+ target_compile_options (${ARGS_TARGET} _lib PRIVATE
380+ $<$<COMPILE_LANGUAGE:Fortran>:-Mextract=lib:${ARGS_TARGET} _lib>
381+ $<$<COMPILE_LANGUAGE:Fortran>:-Minline>
382+ )
383+ add_dependencies (${ARGS_TARGET} ${ARGS_TARGET} _lib)
384+ target_compile_options (${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET} _lib)
385+ list (PREPEND IPO_TARGETS ${ARGS_TARGET} _lib)
379386 endif ()
380387
381- string (TOUPPER "${ARGS_TARGET} " ${ARGS_TARGET} _UPPER)
382- target_compile_definitions (
383- ${ARGS_TARGET} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID}
384- MFC_${${ARGS_TARGET} _UPPER}
385- )
388+ foreach (a_target ${IPO_TARGETS} )
389+ set_target_properties (${a_target} PROPERTIES Fortran_PREPROCESS ON )
386390
387- if (MFC_MPI AND ARGS_MPI)
388- find_package (MPI COMPONENTS Fortran REQUIRED)
391+ target_include_directories (${a_target} PRIVATE
392+ "${CMAKE_SOURCE_DIR} /src/common"
393+ "${CMAKE_SOURCE_DIR} /src/common/include"
394+ "${CMAKE_SOURCE_DIR} /src/${ARGS_TARGET} " )
389395
390- target_compile_definitions (${ARGS_TARGET} PRIVATE MFC_MPI)
391- target_link_libraries (${ARGS_TARGET} PRIVATE MPI::MPI_Fortran)
392- endif ()
396+ if (EXISTS "${CMAKE_SOURCE_DIR} /src/${ARGS_TARGET} /include" )
397+ target_include_directories (${a_target} PRIVATE
398+ "${CMAKE_SOURCE_DIR} /src/${ARGS_TARGET} /include" )
399+ endif ()
393400
394- if (ARGS_SILO)
395- find_package (SILO REQUIRED)
396- target_link_libraries (${ARGS_TARGET} PRIVATE SILO::SILO)
397- endif ()
401+ string (TOUPPER "${ARGS_TARGET} " ${ARGS_TARGET} _UPPER)
402+ target_compile_definitions (
403+ ${a_target} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID}
404+ MFC_${${ARGS_TARGET} _UPPER}
405+ )
398406
399- if (ARGS_HDF5)
400- find_package (HDF5 REQUIRED)
401- target_link_libraries (${ARGS_TARGET} PRIVATE HDF5::HDF5)
402- endif ()
407+ if (MFC_MPI AND ARGS_MPI)
408+ find_package (MPI COMPONENTS Fortran REQUIRED)
403409
404- if (ARGS_FFTW)
405- if (MFC_OpenACC AND ARGS_OpenACC)
406- if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
407- find_package (CUDAToolkit REQUIRED)
408- target_link_libraries (${ARGS_TARGET} PRIVATE CUDA::cudart CUDA::cufft)
409- else ()
410- find_package (hipfort COMPONENTS hipfft CONFIG REQUIRED)
411- target_link_libraries (${ARGS_TARGET} PRIVATE hipfort::hipfft)
412- endif ()
413- else ()
414- find_package (FFTW REQUIRED)
415- target_link_libraries (${ARGS_TARGET} PRIVATE FFTW::FFTW)
410+ target_compile_definitions (${a_target} PRIVATE MFC_MPI)
411+ target_link_libraries (${a_target} PRIVATE MPI::MPI_Fortran)
416412 endif ()
417- endif ()
418413
419- if (MFC_OpenACC AND ARGS_OpenACC)
420- find_package (OpenACC)
414+ if (ARGS_SILO)
415+ find_package (SILO REQUIRED)
416+ target_link_libraries (${a_target} PRIVATE SILO::SILO)
417+ endif ()
421418
422- # This should be equivalent to if (NOT OpenACC_FC_FOUND )
423- if ( NOT TARGET OpenACC::OpenACC_Fortran )
424- message (FATAL_ERROR "OpenACC + Fortran is unsupported." )
419+ if (ARGS_HDF5 )
420+ find_package (HDF5 REQUIRED )
421+ target_link_libraries ( ${a_target} PRIVATE HDF5::HDF5 )
425422 endif ()
426423
427- target_link_libraries (${ARGS_TARGET} PRIVATE OpenACC::OpenACC_Fortran)
428- target_compile_definitions (${ARGS_TARGET} PRIVATE MFC_OpenACC)
429-
430- if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" )
431- # FIXME: This should work with other cards than gfx90a ones.
432- target_compile_options (${ARGS_TARGET} PRIVATE
433- "-foffload=amdgcn-amdhsa='-march=gfx90a'"
434- "-foffload-options=-lgfortran\ -lm"
435- "-fno-exceptions" )
436- elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
437- find_package (cuTENSOR)
438- if (NOT cuTENSOR_FOUND)
439- message (WARNING
440- "Failed to locate the NVIDIA cuTENSOR library. MFC will be "
441- "built without support for it, disallowing the use of "
442- "cu_tensor=T. This can result in degraded performance." )
424+ if (ARGS_FFTW)
425+ if (MFC_OpenACC AND ARGS_OpenACC)
426+ if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
427+ find_package (CUDAToolkit REQUIRED)
428+ target_link_libraries (${a_target} PRIVATE CUDA::cudart CUDA::cufft)
429+ else ()
430+ find_package (hipfort COMPONENTS hipfft CONFIG REQUIRED)
431+ target_link_libraries (${a_target} PRIVATE hipfort::hipfft)
432+ endif ()
443433 else ()
444- target_link_libraries ( ${ARGS_TARGET} PRIVATE cuTENSOR::cuTENSOR )
445- target_compile_definitions (${ARGS_TARGET } PRIVATE MFC_cuTENSOR )
434+ find_package (FFTW REQUIRED )
435+ target_link_libraries (${a_target } PRIVATE FFTW::FFTW )
446436 endif ()
437+ endif ()
447438
448- foreach (cc ${MFC_CUDA_CC} )
449- target_compile_options (${ARGS_TARGET}
450- PRIVATE -gpu=cc${cc}
451- )
452- endforeach ()
453-
454- target_compile_options (${ARGS_TARGET}
455- PRIVATE -gpu=keep,ptxinfo,lineinfo
456- )
439+ if (MFC_OpenACC AND ARGS_OpenACC)
440+ find_package (OpenACC)
457441
458- # GH-200 Unified Memory Support
459- if (MFC_Unified)
460- target_compile_options (${ARGS_TARGET}
461- PRIVATE -gpu=unified
462- )
463- # "This option must appear in both the compile and link lines" -- NVHPC Docs
464- target_link_options (${ARGS_TARGET}
465- PRIVATE -gpu=unified
466- )
442+ # This should be equivalent to if (NOT OpenACC_FC_FOUND)
443+ if (NOT TARGET OpenACC::OpenACC_Fortran)
444+ message (FATAL_ERROR "OpenACC + Fortran is unsupported." )
467445 endif ()
468446
469- if (CMAKE_BUILD_TYPE STREQUAL "Debug" )
470- target_compile_options (${ARGS_TARGET}
471- PRIVATE -gpu=autocompare,debug
447+ target_link_libraries (${a_target} PRIVATE OpenACC::OpenACC_Fortran)
448+ target_compile_definitions (${a_target} PRIVATE MFC_OpenACC)
449+
450+ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" )
451+ # FIXME: This should work with other cards than gfx90a ones.
452+ target_compile_options (${a_target} PRIVATE
453+ "-foffload=amdgcn-amdhsa='-march=gfx90a'"
454+ "-foffload-options=-lgfortran\ -lm"
455+ "-fno-exceptions" )
456+ elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
457+ find_package (cuTENSOR)
458+ if (NOT cuTENSOR_FOUND)
459+ message (WARNING
460+ "Failed to locate the NVIDIA cuTENSOR library. MFC will be "
461+ "built without support for it, disallowing the use of "
462+ "cu_tensor=T. This can result in degraded performance." )
463+ else ()
464+ target_link_libraries (${a_target} PRIVATE cuTENSOR::cuTENSOR)
465+ target_compile_definitions (${a_target} PRIVATE MFC_cuTENSOR)
466+ endif ()
467+
468+ foreach (cc ${MFC_CUDA_CC} )
469+ target_compile_options (${a_target}
470+ PRIVATE -gpu=cc${cc}
471+ )
472+ endforeach ()
473+
474+ target_compile_options (${a_target}
475+ PRIVATE -gpu=keep,ptxinfo,lineinfo
472476 )
477+
478+ # GH-200 Unified Memory Support
479+ if (MFC_Unified)
480+ target_compile_options (${ARGS_TARGET}
481+ PRIVATE -gpu=unified
482+ )
483+ # "This option must appear in both the compile and link lines" -- NVHPC Docs
484+ target_link_options (${ARGS_TARGET}
485+ PRIVATE -gpu=unified
486+ )
487+ endif ()
488+
489+ if (CMAKE_BUILD_TYPE STREQUAL "Debug" )
490+ target_compile_options (${a_target}
491+ PRIVATE -gpu=autocompare,debug
492+ )
493+ endif ()
494+ elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray" )
495+ find_package (hipfort COMPONENTS hip CONFIG REQUIRED)
496+ target_link_libraries (${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn)
473497 endif ()
474- elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray" )
475- find_package (hipfort COMPONENTS hip CONFIG REQUIRED)
476- target_link_libraries (${ARGS_TARGET} PRIVATE hipfort::hip hipfort::hipfort-amdgcn)
498+ elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray" )
499+ target_compile_options (${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc" )
477500 endif ()
478- elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray" )
479- target_compile_options (${ARGS_TARGET} PRIVATE "SHELL:-h noacc" "SHELL:-x acc" )
480- endif ()
481501
482- if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
483- find_package (CUDAToolkit REQUIRED)
484- target_link_libraries (${ARGS_TARGET} PRIVATE CUDA::nvToolsExt)
485- endif ()
502+ if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
503+ find_package (CUDAToolkit REQUIRED)
504+ target_link_libraries (${a_target} PRIVATE CUDA::nvToolsExt)
505+ endif ()
506+ endforeach ()
486507
487508 install (TARGETS ${ARGS_TARGET} RUNTIME DESTINATION bin)
488509endfunction ()
0 commit comments