Skip to content

Commit bfd14bf

Browse files
authored
Merge branch 'MFlowCode:master' into source
2 parents 3f2659d + 3aac2c3 commit bfd14bf

File tree

17 files changed

+292
-297
lines changed

17 files changed

+292
-297
lines changed

.github/workflows/bench.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ jobs:
5252
5353
- name: Generate & Post Comment
5454
run: |
55-
. ./mfc.sh load -c p -m g
56-
./mfc.sh bench_diff master/bench-${{ matrix.device }}.yaml pr/bench-${{ matrix.device }}.yaml
55+
(cd pr && . ./mfc.sh load -c p -m g)
56+
(cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}.yaml ../pr/bench-${{ matrix.device }}.yaml)
5757
5858
- name: Archive Logs
5959
uses: actions/upload-artifact@v3

CMakeLists.txt

Lines changed: 130 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -172,16 +172,13 @@ elseif ((CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_Fortran_COMPILER_
172172
add_compile_options(
173173
$<$<COMPILE_LANGUAGE:Fortran>:-Mfreeform>
174174
$<$<COMPILE_LANGUAGE:Fortran>:-cpp>
175-
-Minfo=accel
175+
$<$<COMPILE_LANGUAGE:Fortran>:-Minfo=inline>
176+
$<$<COMPILE_LANGUAGE:Fortran>:-Minfo=accel>
176177
)
177178

178-
if (CMAKE_BUILD_TYPE STREQUAL "Release")
179-
add_compile_options(
180-
$<$<COMPILE_LANGUAGE:Fortran:-minline>
181-
)
182-
elseif (CMAKE_BUILD_TYPE STREQUAL "Debug")
179+
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
183180
add_compile_options(
184-
$<$<COMPILE_LANGUAGE:Fortran:-O0>
181+
$<$<COMPILE_LANGUAGE:Fortran>:-O0>
185182
)
186183
endif()
187184

@@ -208,13 +205,22 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
208205
endif()
209206

210207
# Enable LTO/IPO if supported
211-
CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR)
212-
if (SUPPORTS_IPO)
213-
message(STATUS "Enabled IPO / LTO")
214-
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
208+
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
209+
if (MFC_Unified)
210+
message(STATUS "IPO is not available with NVHPC using Unified Memory")
211+
else()
212+
message(STATUS "Performing IPO using -Mextract followed by -Minline")
213+
set(NVHPC_USE_TWO_PASS_IPO TRUE)
214+
endif()
215215
else()
216-
message(STATUS "IPO / LTO is NOT available")
217-
endif()
216+
CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR)
217+
if (SUPPORTS_IPO)
218+
message(STATUS "Enabled IPO / LTO")
219+
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
220+
else()
221+
message(STATUS "IPO / LTO is NOT available")
222+
endif()
223+
endif()
218224
endif()
219225

220226
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -365,124 +371,139 @@ function(MFC_SETUP_TARGET)
365371
cmake_parse_arguments(ARGS "OpenACC;MPI;SILO;HDF5;FFTW" "TARGET" "SOURCES" ${ARGN})
366372

367373
add_executable(${ARGS_TARGET} ${ARGS_SOURCES})
368-
369-
set_target_properties(${ARGS_TARGET} PROPERTIES Fortran_PREPROCESS ON)
370-
371-
target_include_directories(${ARGS_TARGET} PRIVATE
372-
"${CMAKE_SOURCE_DIR}/src/common"
373-
"${CMAKE_SOURCE_DIR}/src/common/include"
374-
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}")
375-
376-
if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
377-
target_include_directories(${ARGS_TARGET} PRIVATE
378-
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
374+
set(IPO_TARGETS ${ARGS_TARGET})
375+
# Here we need to split into "library" and "executable" to perform IPO on the NVIDIA compiler.
376+
# A little hacky, but it *is* an edge-case for *one* compiler.
377+
if (NVHPC_USE_TWO_PASS_IPO)
378+
add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES})
379+
target_compile_options(${ARGS_TARGET}_lib PRIVATE
380+
$<$<COMPILE_LANGUAGE:Fortran>:-Mextract=lib:${ARGS_TARGET}_lib>
381+
$<$<COMPILE_LANGUAGE:Fortran>:-Minline>
382+
)
383+
add_dependencies(${ARGS_TARGET} ${ARGS_TARGET}_lib)
384+
target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib)
385+
list(PREPEND IPO_TARGETS ${ARGS_TARGET}_lib)
379386
endif()
380387

381-
string(TOUPPER "${ARGS_TARGET}" ${ARGS_TARGET}_UPPER)
382-
target_compile_definitions(
383-
${ARGS_TARGET} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID}
384-
MFC_${${ARGS_TARGET}_UPPER}
385-
)
388+
foreach (a_target ${IPO_TARGETS})
389+
set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON)
386390

387-
if (MFC_MPI AND ARGS_MPI)
388-
find_package(MPI COMPONENTS Fortran REQUIRED)
391+
target_include_directories(${a_target} PRIVATE
392+
"${CMAKE_SOURCE_DIR}/src/common"
393+
"${CMAKE_SOURCE_DIR}/src/common/include"
394+
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}")
389395

390-
target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_MPI)
391-
target_link_libraries (${ARGS_TARGET} PRIVATE MPI::MPI_Fortran)
392-
endif()
396+
if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
397+
target_include_directories(${a_target} PRIVATE
398+
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
399+
endif()
393400

394-
if (ARGS_SILO)
395-
find_package(SILO REQUIRED)
396-
target_link_libraries(${ARGS_TARGET} PRIVATE SILO::SILO)
397-
endif()
401+
string(TOUPPER "${ARGS_TARGET}" ${ARGS_TARGET}_UPPER)
402+
target_compile_definitions(
403+
${a_target} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID}
404+
MFC_${${ARGS_TARGET}_UPPER}
405+
)
398406

399-
if (ARGS_HDF5)
400-
find_package(HDF5 REQUIRED)
401-
target_link_libraries(${ARGS_TARGET} PRIVATE HDF5::HDF5)
402-
endif()
407+
if (MFC_MPI AND ARGS_MPI)
408+
find_package(MPI COMPONENTS Fortran REQUIRED)
403409

404-
if (ARGS_FFTW)
405-
if (MFC_OpenACC AND ARGS_OpenACC)
406-
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
407-
find_package(CUDAToolkit REQUIRED)
408-
target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::cudart CUDA::cufft)
409-
else()
410-
find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED)
411-
target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hipfft)
412-
endif()
413-
else()
414-
find_package(FFTW REQUIRED)
415-
target_link_libraries(${ARGS_TARGET} PRIVATE FFTW::FFTW)
410+
target_compile_definitions(${a_target} PRIVATE MFC_MPI)
411+
target_link_libraries (${a_target} PRIVATE MPI::MPI_Fortran)
416412
endif()
417-
endif()
418413

419-
if (MFC_OpenACC AND ARGS_OpenACC)
420-
find_package(OpenACC)
414+
if (ARGS_SILO)
415+
find_package(SILO REQUIRED)
416+
target_link_libraries(${a_target} PRIVATE SILO::SILO)
417+
endif()
421418

422-
# This should be equivalent to if (NOT OpenACC_FC_FOUND)
423-
if (NOT TARGET OpenACC::OpenACC_Fortran)
424-
message(FATAL_ERROR "OpenACC + Fortran is unsupported.")
419+
if (ARGS_HDF5)
420+
find_package(HDF5 REQUIRED)
421+
target_link_libraries(${a_target} PRIVATE HDF5::HDF5)
425422
endif()
426423

427-
target_link_libraries(${ARGS_TARGET} PRIVATE OpenACC::OpenACC_Fortran)
428-
target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_OpenACC)
429-
430-
if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
431-
# FIXME: This should work with other cards than gfx90a ones.
432-
target_compile_options(${ARGS_TARGET} PRIVATE
433-
"-foffload=amdgcn-amdhsa='-march=gfx90a'"
434-
"-foffload-options=-lgfortran\ -lm"
435-
"-fno-exceptions")
436-
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
437-
find_package(cuTENSOR)
438-
if (NOT cuTENSOR_FOUND)
439-
message(WARNING
440-
"Failed to locate the NVIDIA cuTENSOR library. MFC will be "
441-
"built without support for it, disallowing the use of "
442-
"cu_tensor=T. This can result in degraded performance.")
424+
if (ARGS_FFTW)
425+
if (MFC_OpenACC AND ARGS_OpenACC)
426+
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
427+
find_package(CUDAToolkit REQUIRED)
428+
target_link_libraries(${a_target} PRIVATE CUDA::cudart CUDA::cufft)
429+
else()
430+
find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED)
431+
target_link_libraries(${a_target} PRIVATE hipfort::hipfft)
432+
endif()
443433
else()
444-
target_link_libraries (${ARGS_TARGET} PRIVATE cuTENSOR::cuTENSOR)
445-
target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_cuTENSOR)
434+
find_package(FFTW REQUIRED)
435+
target_link_libraries(${a_target} PRIVATE FFTW::FFTW)
446436
endif()
437+
endif()
447438

448-
foreach (cc ${MFC_CUDA_CC})
449-
target_compile_options(${ARGS_TARGET}
450-
PRIVATE -gpu=cc${cc}
451-
)
452-
endforeach()
453-
454-
target_compile_options(${ARGS_TARGET}
455-
PRIVATE -gpu=keep,ptxinfo,lineinfo
456-
)
439+
if (MFC_OpenACC AND ARGS_OpenACC)
440+
find_package(OpenACC)
457441

458-
# GH-200 Unified Memory Support
459-
if (MFC_Unified)
460-
target_compile_options(${ARGS_TARGET}
461-
PRIVATE -gpu=unified
462-
)
463-
# "This option must appear in both the compile and link lines" -- NVHPC Docs
464-
target_link_options(${ARGS_TARGET}
465-
PRIVATE -gpu=unified
466-
)
442+
# This should be equivalent to if (NOT OpenACC_FC_FOUND)
443+
if (NOT TARGET OpenACC::OpenACC_Fortran)
444+
message(FATAL_ERROR "OpenACC + Fortran is unsupported.")
467445
endif()
468446

469-
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
470-
target_compile_options(${ARGS_TARGET}
471-
PRIVATE -gpu=autocompare,debug
447+
target_link_libraries(${a_target} PRIVATE OpenACC::OpenACC_Fortran)
448+
target_compile_definitions(${a_target} PRIVATE MFC_OpenACC)
449+
450+
if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
451+
# FIXME: This should work with other cards than gfx90a ones.
452+
target_compile_options(${a_target} PRIVATE
453+
"-foffload=amdgcn-amdhsa='-march=gfx90a'"
454+
"-foffload-options=-lgfortran\ -lm"
455+
"-fno-exceptions")
456+
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
457+
find_package(cuTENSOR)
458+
if (NOT cuTENSOR_FOUND)
459+
message(WARNING
460+
"Failed to locate the NVIDIA cuTENSOR library. MFC will be "
461+
"built without support for it, disallowing the use of "
462+
"cu_tensor=T. This can result in degraded performance.")
463+
else()
464+
target_link_libraries (${a_target} PRIVATE cuTENSOR::cuTENSOR)
465+
target_compile_definitions(${a_target} PRIVATE MFC_cuTENSOR)
466+
endif()
467+
468+
foreach (cc ${MFC_CUDA_CC})
469+
target_compile_options(${a_target}
470+
PRIVATE -gpu=cc${cc}
471+
)
472+
endforeach()
473+
474+
target_compile_options(${a_target}
475+
PRIVATE -gpu=keep,ptxinfo,lineinfo
472476
)
477+
478+
# GH-200 Unified Memory Support
479+
if (MFC_Unified)
480+
target_compile_options(${ARGS_TARGET}
481+
PRIVATE -gpu=unified
482+
)
483+
# "This option must appear in both the compile and link lines" -- NVHPC Docs
484+
target_link_options(${ARGS_TARGET}
485+
PRIVATE -gpu=unified
486+
)
487+
endif()
488+
489+
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
490+
target_compile_options(${a_target}
491+
PRIVATE -gpu=autocompare,debug
492+
)
493+
endif()
494+
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
495+
find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
496+
target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn)
473497
endif()
474-
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
475-
find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
476-
target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hip hipfort::hipfort-amdgcn)
498+
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
499+
target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")
477500
endif()
478-
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
479-
target_compile_options(${ARGS_TARGET} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")
480-
endif()
481501

482-
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
483-
find_package(CUDAToolkit REQUIRED)
484-
target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::nvToolsExt)
485-
endif()
502+
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
503+
find_package(CUDAToolkit REQUIRED)
504+
target_link_libraries(${a_target} PRIVATE CUDA::nvToolsExt)
505+
endif()
506+
endforeach()
486507

487508
install(TARGETS ${ARGS_TARGET} RUNTIME DESTINATION bin)
488509
endfunction()

README.md

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@
2424

2525
Welcome to the home of MFC!
2626
MFC simulates compressible multi-component and multi-phase flows, [amongst other things](#what-else-can-this-thing-do).
27-
MFC is written in Fortran and makes use of metaprogramming to keep the code short (about 20K lines).
27+
MFC is written in Fortran and uses metaprogramming to keep the code short (about 20K lines).
2828

2929
MFC is used on the latest leadership-class supercomputers.
3030
It scales <b>ideally to exascale</b>; [tens of thousands of GPUs on NVIDIA- and AMD-GPU machines](#is-this-really-exascale) on Oak Ridge Summit and Frontier.
31-
MFC is a SPEChpc benchmark candidate, part of the JSC JUPITER Early Access Program, and made use of OLCF Frontier and LLNL El Capitan early access systems.
31+
MFC is a SPEChpc benchmark candidate, part of the JSC JUPITER Early Access Program, and used OLCF Frontier and LLNL El Capitan early access systems.
3232

3333
Get in touch with <a href="mailto:[email protected]">Spencer</a> if you have questions!
3434
We have an [active Slack channel](https://join.slack.com/t/mflowcode/shared_invite/zt-y75wibvk-g~zztjknjYkK1hFgCuJxVw) and development team.
@@ -56,7 +56,7 @@ Another example is the high-Mach flow over an airfoil, shown below.
5656
You can navigate [to this webpage](https://mflowcode.github.io/documentation/md_getting-started.html) to get started using MFC!
5757
It's rather straightforward.
5858
We'll give a brief intro. here for MacOS.
59-
Using [brew](https://brew.sh), install MFC's modest set of dependencies:
59+
Using [brew](https://brew.sh), install MFC's dependencies:
6060
```shell
6161
brew install wget python cmake gcc@14 mpich
6262
```
@@ -82,15 +82,15 @@ You can learn more about MFC's capabilities [via its documentation](https://mflo
8282

8383
The shock-droplet interaction case above was run via
8484
```shell
85-
./mfc.sh run ./examples/3d_shockdroplet/case.py -n 8
85+
./mfc.sh run -n $(nproc) ./examples/3d_shockdroplet/case.py
8686
```
8787
where `8` is the number of cores the example will run on.
88-
You can visualize the output data in `examples/3d_shockdroplet/silo_hdf5` via Paraview, Visit, or your other favorite software.
88+
You can visualize the output data in `examples/3d_shockdroplet/silo_hdf5` via Paraview, Visit, or your favorite software.
8989

9090
## Is this _really_ exascale?
9191

9292
[OLCF Frontier](https://www.olcf.ornl.gov/frontier/) is the first exascale supercomputer.
93-
The weak scaling of MFC on this machine is below, showing near-ideal utilization.
93+
The weak scaling of MFC on this machine shows near-ideal utilization.
9494

9595
<p align="center">
9696
<img src="docs/res/scaling.png" alt="Scaling" width="400"/>
@@ -158,7 +158,7 @@ They are organized below. Just click the drop-downs!
158158
* [Fypp](https://fypp.readthedocs.io/en/stable/fypp.html) metaprogramming for code readability, performance, and portability
159159
* Continuous Integration (CI)
160160
* \>100 Regression tests with each PR.
161-
* Performed with GNU, Intel, and NVIDIA compilers on NVIDIA and AMD GPUs.
161+
* Performed with GNU (GCC), Intel, Cray (CCE), and NVIDIA (NVHPC) compilers on NVIDIA and AMD GPUs.
162162
* Line-level test coverage reports via [Codecov](https://app.codecov.io/gh/MFlowCode/MFC) and `gcov`
163163
* Benchmarking to avoid performance regressions and identify speed-ups
164164
* Continuous Deployment (CD) of [website](https://mflowcode.github.io) and [API documentation](https://mflowcode.github.io/documentation/index.html)
@@ -201,7 +201,7 @@ If you use MFC, consider citing it as:
201201

202202
## License
203203

204-
Copyright 2021-2024 Spencer Bryngelson and Tim Colonius.
204+
Copyright 2021 Spencer Bryngelson and Tim Colonius.
205205
MFC is under the MIT license (see [LICENSE](LICENSE) for full text).
206206

207207
## Acknowledgements
@@ -210,6 +210,7 @@ Multiple federal sponsors have supported MFC development, including the US Depar
210210

211211
MFC computations have used many supercomputing systems. A partial list is below
212212
* OLCF Frontier and Summit, and testbed systems Wombat, Crusher, and Spock (allocation CFD154, PI Bryngelson)
213+
* LLNL Lassen and El Capitan testbed system, Tioga
213214
* PSC Bridges(1/2), NCSA Delta, SDSC Comet and Expanse, Purdue Anvil, TACC Stampede(1-3), and TAMU ACES via ACCESS-CI (allocations TG-CTS120005 (PI Colonius) and TG-PHY210084 (PI Bryngelson))
214215
* DOD systems Onyx, Carpenter, and Nautilus via the DOD HPCMP program
215216
* Sandia National Labs systems Doom and Attaway and testbed systems Weaver and Vortex

0 commit comments

Comments
 (0)