Skip to content

Commit 6b834c7

Browse files
committed
test suite works with --unified and --no-unified
1 parent c110e9e commit 6b834c7

File tree

1 file changed

+62
-41
lines changed

1 file changed

+62
-41
lines changed

src/simulation/m_time_steppers.fpp

Lines changed: 62 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ contains
9494
use hipfort_check
9595
use openacc
9696
#endif
97-
9897
integer :: i, j !< Generic loop iterators
9998

10099
! Setting number of time-stages for selected time-stepping scheme
@@ -112,10 +111,6 @@ contains
112111
end do
113112

114113
#ifdef FRONTIER_UNIFIED
115-
if (proc_rank == 0 .and. num_ts /= 2) then
116-
call s_mpi_abort("Frontier unified memory assumes time_stepper = 2/3")
117-
end if
118-
119114
! Allocate to memory regions using hip calls
120115
! that we will attach pointers to
121116
do i = 1, 3
@@ -133,17 +128,21 @@ contains
133128
! CCE see it can access this and will leave it on the host. It will stay on the host so long as HSA_XNACK=1
134129
! NOTE: WE CANNOT DO ATOMICS INTO THIS MEMORY. We have to change a property to use atomics here
135130
! Otherwise leaving this as fine-grained will actually help performance since it can't be cached in GPU L2
136-
call hipCheck(hipMallocManaged(q_cons_ts_pool_host, dims8=pool_dims, lbounds8=pool_starts, flags=hipMemAttachGlobal))
131+
if (num_ts == 2) then
132+
call hipCheck(hipMallocManaged(q_cons_ts_pool_host, dims8=pool_dims, lbounds8=pool_starts, flags=hipMemAttachGlobal))
133+
end if
137134

138135
do j = 1, sys_size
139136
! q_cons_ts(1) lives on the device
140137
q_cons_ts(1)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, &
141138
idwbuff(2)%beg:idwbuff(2)%end, &
142139
idwbuff(3)%beg:idwbuff(3)%end) => q_cons_ts_pool_device(:, :, :, j)
143-
! q_cons_ts(2) lives on the host
144-
q_cons_ts(2)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, &
145-
idwbuff(2)%beg:idwbuff(2)%end, &
146-
idwbuff(3)%beg:idwbuff(3)%end) => q_cons_ts_pool_host(:, :, :, j)
140+
if (num_ts == 2) then
141+
! q_cons_ts(2) lives on the host
142+
q_cons_ts(2)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, &
143+
idwbuff(2)%beg:idwbuff(2)%end, &
144+
idwbuff(3)%beg:idwbuff(3)%end) => q_cons_ts_pool_host(:, :, :, j)
145+
end if
147146
end do
148147

149148
do i = 1, num_ts
@@ -520,6 +519,7 @@ contains
520519
real(wp), intent(inout) :: time_avg
521520

522521
integer :: i, j, k, l, q!< Generic loop iterator
522+
integer :: dest
523523
real(wp) :: start, finish
524524

525525
! Stage 1 of 2
@@ -565,6 +565,8 @@ contains
565565
end do
566566
end do
567567
end do
568+
569+
dest = 1 ! Result in q_cons_ts(1)%vf
568570
#else
569571
$:GPU_PARALLEL_LOOP(collapse=4)
570572
do i = 1, sys_size
@@ -578,6 +580,8 @@ contains
578580
end do
579581
end do
580582
end do
583+
584+
dest = 2 ! Result in q_cons_ts(2)%vf
581585
#endif
582586

583587
!Evolve pb and mv for non-polytropic qbmm
@@ -615,21 +619,21 @@ contains
615619
end do
616620
end if
617621

618-
if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt)
622+
if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, dt)
619623

620-
if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf)
624+
if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf)
621625

622626
if (model_eqns == 3 .and. (.not. relax)) then
623-
call s_pressure_relaxation_procedure(q_cons_ts(2)%vf)
627+
call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf)
624628
end if
625629

626-
if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf)
630+
if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf)
627631

628632
if (ib) then
629633
if (qbmm .and. .not. polytropic) then
630-
call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf)
634+
call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf)
631635
else
632-
call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf)
636+
call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf)
633637
end if
634638
end if
635639

@@ -656,6 +660,8 @@ contains
656660
end do
657661
end do
658662
end do
663+
664+
dest = 1 ! Result in q_cons_ts(1)%vf
659665
#else
660666
$:GPU_PARALLEL_LOOP(collapse=4)
661667
do i = 1, sys_size
@@ -670,6 +676,8 @@ contains
670676
end do
671677
end do
672678
end do
679+
680+
dest = 1 ! Result in q_cons_ts(1)%vf
673681
#endif
674682

675683
if (qbmm .and. (.not. polytropic)) then
@@ -708,21 +716,21 @@ contains
708716
end do
709717
end if
710718

711-
if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp)
719+
if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp)
712720

713-
if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf)
721+
if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf)
714722

715723
if (model_eqns == 3 .and. (.not. relax)) then
716-
call s_pressure_relaxation_procedure(q_cons_ts(1)%vf)
724+
call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf)
717725
end if
718726

719-
if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf)
727+
if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf)
720728

721729
if (ib) then
722730
if (qbmm .and. .not. polytropic) then
723-
call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf)
731+
call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf)
724732
else
725-
call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf)
733+
call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf)
726734
end if
727735
end if
728736

@@ -740,6 +748,7 @@ contains
740748
real(wp), intent(INOUT) :: time_avg
741749

742750
integer :: i, j, k, l, q !< Generic loop iterator
751+
integer :: dest
743752

744753
real(wp) :: start, finish
745754

@@ -787,6 +796,8 @@ contains
787796
end do
788797
end do
789798
end do
799+
800+
dest = 1 ! result in q_cons_ts(1)%vf
790801
#else
791802
$:GPU_PARALLEL_LOOP(collapse=4)
792803
do i = 1, sys_size
@@ -800,6 +811,8 @@ contains
800811
end do
801812
end do
802813
end do
814+
815+
dest = 2 ! result in q_cons_ts(2)%vf
803816
#endif
804817

805818
!Evolve pb and mv for non-polytropic qbmm
@@ -837,21 +850,21 @@ contains
837850
end do
838851
end if
839852

840-
if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt)
853+
if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, dt)
841854

842-
if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf)
855+
if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf)
843856

844857
if (model_eqns == 3 .and. (.not. relax)) then
845-
call s_pressure_relaxation_procedure(q_cons_ts(2)%vf)
858+
call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf)
846859
end if
847860

848-
if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf)
861+
if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf)
849862

850863
if (ib) then
851864
if (qbmm .and. .not. polytropic) then
852-
call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf)
865+
call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf)
853866
else
854-
call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf)
867+
call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf)
855868
end if
856869
end if
857870

@@ -878,6 +891,8 @@ contains
878891
end do
879892
end do
880893
end do
894+
895+
dest = 1 ! Result in q_cons_ts(1)%vf
881896
#else
882897
$:GPU_PARALLEL_LOOP(collapse=4)
883898
do i = 1, sys_size
@@ -892,6 +907,8 @@ contains
892907
end do
893908
end do
894909
end do
910+
911+
dest = 2 ! Result in q_cons_ts(2)%vf
895912
#endif
896913

897914
if (qbmm .and. (.not. polytropic)) then
@@ -930,21 +947,21 @@ contains
930947
end do
931948
end if
932949

933-
if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, dt/4._wp)
950+
if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, dt/4._wp)
934951

935-
if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(2)%vf)
952+
if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf)
936953

937954
if (model_eqns == 3 .and. (.not. relax)) then
938-
call s_pressure_relaxation_procedure(q_cons_ts(2)%vf)
955+
call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf)
939956
end if
940957

941-
if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2)%vf)
958+
if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf)
942959

943960
if (ib) then
944961
if (qbmm .and. .not. polytropic) then
945-
call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf)
962+
call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(2)%sf, mv_ts(2)%sf)
946963
else
947-
call s_ibm_correct_state(q_cons_ts(2)%vf, q_prim_vf)
964+
call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf)
948965
end if
949966
end if
950967

@@ -971,6 +988,8 @@ contains
971988
end do
972989
end do
973990
end do
991+
992+
dest = 1 ! Result in q_cons_ts(1)%vf
974993
#else
975994
$:GPU_PARALLEL_LOOP(collapse=4)
976995
do i = 1, sys_size
@@ -985,6 +1004,8 @@ contains
9851004
end do
9861005
end do
9871006
end do
1007+
1008+
dest = 1 ! Result in q_cons_ts(2)%vf
9881009
#endif
9891010

9901011
if (qbmm .and. (.not. polytropic)) then
@@ -1023,25 +1044,25 @@ contains
10231044
end do
10241045
end if
10251046

1026-
if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp)
1047+
if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest)%vf, q_prim_vf, rhs_vf, 2._wp*dt/3._wp)
10271048

1028-
if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(1)%vf)
1049+
if (grid_geometry == 3) call s_apply_fourier_filter(q_cons_ts(dest)%vf)
10291050

10301051
if (model_eqns == 3 .and. (.not. relax)) then
1031-
call s_pressure_relaxation_procedure(q_cons_ts(1)%vf)
1052+
call s_pressure_relaxation_procedure(q_cons_ts(dest)%vf)
10321053
end if
10331054

10341055
call nvtxStartRange("RHS-ELASTIC")
1035-
if (hyperelasticity) call s_hyperelastic_rmt_stress_update(q_cons_ts(1)%vf, q_prim_vf)
1056+
if (hyperelasticity) call s_hyperelastic_rmt_stress_update(q_cons_ts(dest)%vf, q_prim_vf)
10361057
call nvtxEndRange
10371058

1038-
if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1)%vf)
1059+
if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest)%vf)
10391060

10401061
if (ib) then
10411062
if (qbmm .and. .not. polytropic) then
1042-
call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf)
1063+
call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf)
10431064
else
1044-
call s_ibm_correct_state(q_cons_ts(1)%vf, q_prim_vf)
1065+
call s_ibm_correct_state(q_cons_ts(dest)%vf, q_prim_vf)
10451066
end if
10461067
end if
10471068

0 commit comments

Comments
 (0)