Skip to content

Commit 68e62d4

Browse files
authored
Merge branch 'master' into add-modular-precision-update
2 parents 6986aac + 78a810f commit 68e62d4

25 files changed

+474
-939
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,11 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
154154
endif()
155155
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
156156
add_compile_options(
157-
"SHELL:-h nomessage=296:878:1391:1069"
157+
"SHELL:-M 296,878,1391,1069,5025"
158158
"SHELL:-h static" "SHELL:-h keepfiles"
159159
"SHELL:-h acc_model=auto_async_none"
160160
"SHELL: -h acc_model=no_fast_addr"
161-
"SHELL: -h list=adm" "-DCRAY_ACC_SIMPLIFY" "-DCRAY_ACC_WAR"
161+
"SHELL: -h list=adm"
162162
)
163163

164164
add_link_options("SHELL:-hkeepfiles")

docs/documentation/expectedPerformance.md

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,37 +30,38 @@ Note:
3030
| NVIDIA V100 | | GPU | 1 GPU | 0.99 | NVHPC 22.11 | GT Phoenix |
3131
| NVIDIA A30 | | GPU | 1 GPU | 1.1 | NVHPC 24.1 | GT Rogues Gallery |
3232
| AMD MI250X | | GPU | 1 _GCD_* | 1.1 | CCE 16.0.1 | OLCF Frontier |
33-
| AMD EPYC 9965 | Turin | CPU | 192 cores | 1.2 | AOCC 5.0.0 | AMD Volcano |
33+
| AMD EPYC 9965 | Turin, Zen5c | CPU | 192 cores | 1.2 | AOCC 5.0.0 | AMD Volcano |
3434
| AMD MI100 | | GPU | 1 GPU | 1.4 | CCE 16.0.1 | Cray internal system |
35+
| AMD EPYC 9755 | Turin, Zen5 | CPU | 128 cores | 1.4 | AOCC 5.0.0 | AMD Volcano |
3536
| Intel Xeon 6980P | Granite Rapids | CPU | 128 cores | 1.4 | Intel 2024.2 | Intel Endeavour |
3637
| NVIDIA L40S | FP32-only GPU | GPU | 1 GPU | 1.7 | NVHPC 24.5 | GT ICE |
37-
| AMD EPYC 9654 | Genoa | CPU | 96 cores | 1.7 | Intel 2021.9 | DOD Carpenter |
38+
| AMD EPYC 9654 | Genoa, Zen4 | CPU | 96 cores | 1.7 | Intel 2021.9 | DOD Carpenter |
3839
| Intel Xeon 6960P | Granite Rapids | CPU | 72 cores | 1.7 | Intel 2024.2 | Intel AI Cloud |
3940
| NVIDIA P100 | | GPU | 1 GPU | 2.4 | NVHPC 23.5 | GT CSE Internal |
4041
| Intel Xeon 8592+ | Emerald Rapids | CPU | 64 cores | 2.6 | Intel 2024.2 | Intel AI Cloud |
41-
| Intel Xeon 6900E | Sierra Forest Advanced, 2.8GHz Boost, 384 MiB L3 | CPU | 192 cores | 2.6 | Intel 2024.2 | Intel AI Cloud |
42-
| AMD EPYC 9534 | Genoa | CPU | 64 cores | 2.7 | GNU 12.3.0 | GT Phoenix |
42+
| Intel Xeon 6900E | Sierra Forest Adv., 2.8GHz Boost, 384 MiB L3 | CPU | 192 cores | 2.6 | Intel 2024.2 | Intel AI Cloud |
43+
| AMD EPYC 9534 | Genoa, Zen4 | CPU | 64 cores | 2.7 | GNU 12.3.0 | GT Phoenix |
4344
| NVIDIA A40 | FP32-only GPU | GPU | 1 GPU | 3.3 | NVHPC 22.11 | NCSA Delta |
4445
| Intel Xeon Max 9468 | Sapphire Rapids HBM | CPU | 48 cores | 3.5 | NVHPC 24.5 | GT Rogues Gallery |
4546
| NVIDIA Grace CPU | Arm, Neoverse V2 | CPU | 72 cores | 3.7 | NVHPC 24.1 | GT Rogues Gallery |
4647
| NVIDIA RTX6000 | FP32-only GPU | GPU | 1 GPU | 3.9 | NVHPC 22.11 | GT Phoenix |
47-
| AMD EPYC 7763 | Milan | CPU | 64 cores | 4.1 | GNU 11.4.0 | NCSA Delta |
48+
| AMD EPYC 7763 | Milan, Zen3 | CPU | 64 cores | 4.1 | GNU 11.4.0 | NCSA Delta |
4849
| Intel Xeon 6740E | Sierra Forest | CPU | 92 cores | 4.2 | Intel 2024.2 | Intel AI Cloud |
4950
| NVIDIA A10 | FP32-only GPU | GPU | 1 GPU | 4.3 | NVHPC 24.1 | TAMU Faster |
50-
| AMD EPYC 7713 | Milan | CPU | 64 cores | 5.0 | GNU 12.3.0 | GT Phoenix |
51+
| AMD EPYC 7713 | Milan, Zen3 | CPU | 64 cores | 5.0 | GNU 12.3.0 | GT Phoenix |
5152
| Intel Xeon 8480CL | Sapphire Rapids | CPU | 56 cores | 5.0 | NVHPC 24.5 | GT Phoenix |
5253
| Intel Xeon 6454S | Sapphire Rapids | CPU | 32 cores | 5.6 | NVHPC 24.5 | GT Rogues Gallery |
5354
| Intel Xeon 8462Y+ | Sapphire Rapids | CPU | 32 cores | 6.2 | GNU 12.3.0 | GT ICE |
5455
| Intel Xeon 6548Y+ | Emerald Rapids | CPU | 32 cores | 6.6 | Intel 2021.9 | GT ICE |
5556
| Intel Xeon 8352Y | Ice Lake | CPU | 32 cores | 6.6 | NVHPC 24.5 | GT Rogues Gallery |
5657
| Ampere Altra Q80-28 | Arm, Neoverse-N1 | CPU | 80 cores | 6.8 | GNU 12.2.0 | OLCF Wombat |
57-
| AMD EPYC 7513 | Milan | CPU | 32 cores | 7.4 | GNU 12.3.0 | GT ICE |
58+
| AMD EPYC 7513 | Milan, Zen3 | CPU | 32 cores | 7.4 | GNU 12.3.0 | GT ICE |
5859
| Intel Xeon 8268 | Cascade Lake | CPU | 24 cores | 7.5 | Intel 2024.2 | TAMU ACES |
59-
| AMD EPYC 7452 | Rome | CPU | 32 cores | 8.4 | GNU 12.3.0 | GT ICE |
60+
| AMD EPYC 7452 | Rome, Zen2 | CPU | 32 cores | 8.4 | GNU 12.3.0 | GT ICE |
6061
| NVIDIA T4 | FP32-only GPU | GPU | 1 GPU | 8.8 | NVHPC 24.1 | TAMU Faster |
6162
| Intel Xeon 8160 | Skylake | CPU | 24 cores | 8.9 | Intel 2024.0 | TACC Stampede3 |
6263
| IBM Power10 | | CPU | 24 cores | 10 | GNU 13.3.1 | GT Rogues Gallery |
63-
| AMD EPYC 7401 | Naples | CPU | 24 cores | 10 | GNU 10.3.1 | LLNL Corona |
64+
| AMD EPYC 7401 | Naples, Zen(1) | CPU | 24 cores | 10 | GNU 10.3.1 | LLNL Corona |
6465
| Intel Xeon 6226 | Cascade Lake | CPU | 12 cores | 17 | GNU 12.3.0 | GT ICE |
6566
| Apple M1 Max | | CPU | 10 cores | 20 | GNU 14.1.0 | N/A |
6667
| IBM Power9 | | CPU | 20 cores | 21 | GNU 9.1.0 | OLCF Summit |

src/common/include/macros.fpp

Lines changed: 7 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -13,71 +13,31 @@
1313
#:def ALLOCATE(*args)
1414
@:LOG({'@:ALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'})
1515
allocate (${', '.join(args)}$)
16-
#ifndef CRAY_ACC_WAR
17-
!$acc enter data create(${', '.join(args)}$)
18-
#endif
16+
!$acc enter data create(${', '.join(args)}$)
1917
#:enddef ALLOCATE
2018

2119
#:def DEALLOCATE(*args)
2220
@:LOG({'@:DEALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'})
2321
deallocate (${', '.join(args)}$)
24-
#ifndef CRAY_ACC_WAR
25-
!$acc exit data delete(${', '.join(args)}$)
26-
#endif
22+
!$acc exit data delete(${', '.join(args)}$)
2723
#:enddef DEALLOCATE
2824

2925
#:def ALLOCATE_GLOBAL(*args)
3026
@:LOG({'@:ALLOCATE_GLOBAL(${re.sub(' +', ' ', ', '.join(args))}$)'})
31-
#ifdef CRAY_ACC_WAR
32-
allocate (${', '.join(('p_' + arg.strip() for arg in args))}$)
33-
#:for arg in args
34-
${re.sub('\\(.*\\)','',arg)}$ => ${ 'p_' + re.sub('\\(.*\\)','',arg.strip()) }$
35-
#:endfor
36-
!$acc enter data create(${', '.join(('p_' + re.sub('\\(.*\\)','',arg.strip()) for arg in args))}$) &
37-
!$acc& attach(${', '.join(map(lambda x: re.sub('\\(.*\\)','',x), args))}$)
38-
#else
27+
3928
allocate (${', '.join(args)}$)
40-
!$acc enter data create(${', '.join(args)}$)
41-
#endif
4229

4330
#:enddef ALLOCATE_GLOBAL
4431

4532
#:def DEALLOCATE_GLOBAL(*args)
4633
@:LOG({'@:DEALLOCATE_GLOBAL(${re.sub(' +', ' ', ', '.join(args))}$)'})
47-
#ifdef CRAY_ACC_WAR
48-
!$acc exit data delete(${', '.join(('p_' + arg.strip() for arg in args))}$) &
49-
!$acc& detach(${', '.join(args)}$)
50-
#:for arg in args
51-
nullify (${arg}$)
52-
#:endfor
53-
deallocate (${', '.join(('p_' + arg.strip() for arg in args))}$)
54-
#else
34+
5535
deallocate (${', '.join(args)}$)
56-
!$acc exit data delete(${', '.join(args)}$)
57-
#endif
5836

5937
#:enddef DEALLOCATE_GLOBAL
6038

61-
#:def CRAY_DECLARE_GLOBAL(intype, dim, *args)
62-
#ifdef CRAY_ACC_WAR
63-
${intype}$, ${dim}$, allocatable, target :: ${', '.join(('p_' + arg.strip() for arg in args))}$
64-
${intype}$, ${dim}$, pointer :: ${', '.join(args)}$
65-
#else
66-
${intype}$, ${dim}$, allocatable :: ${', '.join(args)}$
67-
#endif
68-
#:enddef CRAY_DECLARE_GLOBAL
69-
70-
#:def CRAY_DECLARE_GLOBAL_SCALAR(intype, *args)
71-
#ifdef CRAY_ACC_WAR
72-
${intype}$, target :: ${', '.join(('p_' + arg.strip() for arg in args))}$
73-
${intype}$, pointer :: ${', '.join(args)}$
74-
#else
75-
${intype}$::${', '.join(args)}$
76-
#endif
77-
#:enddef CRAY_DECLARE_GLOBAL_SCALAR
78-
7939
#:def ACC_SETUP_VFs(*args)
80-
#ifdef CRAY_ACC_WAR
40+
#ifdef _CRAYFTN
8141
block
8242
integer :: macros_setup_vfs_i
8343

@@ -100,7 +60,7 @@
10060
#:enddef
10161

10262
#:def ACC_SETUP_SFs(*args)
103-
#ifdef CRAY_ACC_WAR
63+
#ifdef _CRAYFTN
10464
block
10565

10666
@:LOG({'@:ACC_SETUP_SFs(${', '.join(args)}$)'})
@@ -116,7 +76,7 @@
11676
#:enddef
11777

11878
#:def ACC_SETUP_source_spatials(*args)
119-
#ifdef CRAY_ACC_WAR
79+
#ifdef _CRAYFTN
12080
block
12181

12282
@:LOG({'@:ACC_SETUP_source_spatials(${', '.join(args)}$)'})

src/common/m_phase_change.fpp

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -34,21 +34,6 @@ module m_phase_change
3434
s_infinite_relaxation_k, &
3535
s_finalize_relaxation_solver_module
3636

37-
!> @name Abstract interface for creating function pointers
38-
!> @{
39-
abstract interface
40-
41-
!> @name Abstract subroutine for the infinite relaxation solver
42-
!> @{
43-
subroutine s_abstract_relaxation_solver(q_cons_vf)
44-
import :: scalar_field, sys_size
45-
type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
46-
end subroutine
47-
!> @}
48-
49-
end interface
50-
!> @}
51-
5237
!> @name Parameters for the first order transition phase change
5338
!> @{
5439
integer, parameter :: max_iter = 1e8_wp !< max # of iterations
@@ -66,10 +51,18 @@ module m_phase_change
6651

6752
!$acc declare create(max_iter,pCr,TCr,mixM,lp,vp,A,B,C,D)
6853

69-
procedure(s_abstract_relaxation_solver), pointer :: s_relaxation_solver => null()
70-
7154
contains
7255

56+
!> This subroutine should dispatch to the correct relaxation solver based
57+
!! some parameter. It replaces the procedure pointer, which CCE
58+
!! is breaking on.
59+
subroutine s_relaxation_solver(q_cons_vf)
60+
type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
61+
! This is empty because in current master the procedure pointer
62+
! was never assigned
63+
@:ASSERT(.false., "s_relaxation_solver called but it currently does nothing")
64+
end subroutine s_relaxation_solver
65+
7366
!> The purpose of this subroutine is to initialize the phase change module
7467
!! by setting the parameters needed for phase change and
7568
!! selecting the phase change module that will be used
@@ -298,8 +291,9 @@ contains
298291
!! @param rhoe mixture energy
299292
!! @param TS equilibrium temperature at the interface
300293
subroutine s_infinite_pt_relaxation_k(j, k, l, MFL, pS, p_infpT, rM, q_cons_vf, rhoe, TS)
301-
#ifdef CRAY_ACC_WAR
302-
!DIR$ INLINEALWAYS s_compute_speed_of_sound
294+
295+
#ifdef _CRAYFTN
296+
!DIR$ INLINEALWAYS s_infinite_pt_relaxation_k
303297
#else
304298
!$acc routine seq
305299
#endif
@@ -403,7 +397,7 @@ contains
403397
!! @param TS equilibrium temperature at the interface
404398
subroutine s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS)
405399

406-
#ifdef CRAY_ACC_WAR
400+
#ifdef _CRAYFTN
407401
!DIR$ INLINEALWAYS s_infinite_ptg_relaxation_k
408402
#else
409403
!$acc routine seq
@@ -527,7 +521,8 @@ contains
527521
!! @param k generic loop iterator for y direction
528522
!! @param l generic loop iterator for z direction
529523
subroutine s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l)
530-
#ifdef CRAY_ACC_WAR
524+
525+
#ifdef _CRAYFTN
531526
!DIR$ INLINEALWAYS s_correct_partial_densities
532527
#else
533528
!$acc routine seq
@@ -590,7 +585,7 @@ contains
590585
!! @param TJac Transpose of the Jacobian Matrix
591586
subroutine s_compute_jacobian_matrix(InvJac, j, Jac, k, l, mCPD, mCVGP, mCVGP2, pS, q_cons_vf, TJac)
592587
593-
#ifdef CRAY_ACC_WAR
588+
#ifdef _CRAYFTN
594589
!DIR$ INLINEALWAYS s_compute_jacobian_matrix
595590
#else
596591
!$acc routine seq
@@ -697,7 +692,7 @@ contains
697692
!! @param R2D (2D) residue array
698693
subroutine s_compute_pTg_residue(j, k, l, mCPD, mCVGP, mQD, q_cons_vf, pS, rhoe, R2D)
699694
700-
#ifdef CRAY_ACC_WAR
695+
#ifdef _CRAYFTN
701696
!DIR$ INLINEALWAYS s_compute_pTg_residue
702697
#else
703698
!$acc routine seq
@@ -747,8 +742,9 @@ contains
747742
!! @param TSat Saturation Temperature
748743
!! @param TSIn equilibrium Temperature
749744
subroutine s_TSat(pSat, TSat, TSIn)
750-
#ifdef CRAY_ACC_WAR
751-
!DIR$ INLINEALWAYS s_compute_speed_of_sound
745+
746+
#ifdef _CRAYFTN
747+
!DIR$ INLINEALWAYS s_TSat
752748
#else
753749
!$acc routine seq
754750
#endif

0 commit comments

Comments
 (0)