@@ -94,7 +94,6 @@ contains
9494 use hipfort_check
9595 use openacc
9696#endif
97-
9897 integer :: i, j !< Generic loop iterators
9998
10099 ! Setting number of time- stages for selected time- stepping scheme
@@ -112,10 +111,6 @@ contains
112111 end do
113112
114113#ifdef FRONTIER_UNIFIED
115- if (proc_rank == 0 .and. num_ts /= 2 ) then
116- call s_mpi_abort(" Frontier unified memory assumes time_stepper = 2/3" )
117- end if
118-
119114 ! Allocate to memory regions using hip calls
120115 ! that we will attach pointers to
121116 do i = 1 , 3
@@ -133,17 +128,21 @@ contains
133128 ! CCE see it can access this and will leave it on the host. It will stay on the host so long as HSA_XNACK=1
134129 ! NOTE: WE CANNOT DO ATOMICS INTO THIS MEMORY. We have to change a property to use atomics here
135130 ! Otherwise leaving this as fine-grained will actually help performance since it can' t be cached in GPU L2
136- call hipCheck(hipMallocManaged(q_cons_ts_pool_host, dims8= pool_dims, lbounds8= pool_starts, flags= hipMemAttachGlobal))
131+ if (num_ts == 2 ) then
132+ call hipCheck(hipMallocManaged(q_cons_ts_pool_host, dims8= pool_dims, lbounds8= pool_starts, flags= hipMemAttachGlobal))
133+ end if
137134
138135 do j = 1 , sys_size
139136 ! q_cons_ts(1 ) lives on the device
140137 q_cons_ts(1 )%vf(j)%sf(idwbuff(1 )%beg:idwbuff(1 )%end, &
141138 idwbuff(2 )%beg:idwbuff(2 )%end, &
142139 idwbuff(3 )%beg:idwbuff(3 )%end) = > q_cons_ts_pool_device(:, :, :, j)
143- ! q_cons_ts(2 ) lives on the host
144- q_cons_ts(2 )%vf(j)%sf(idwbuff(1 )%beg:idwbuff(1 )%end, &
145- idwbuff(2 )%beg:idwbuff(2 )%end, &
146- idwbuff(3 )%beg:idwbuff(3 )%end) = > q_cons_ts_pool_host(:, :, :, j)
140+ if (num_ts == 2 ) then
141+ ! q_cons_ts(2 ) lives on the host
142+ q_cons_ts(2 )%vf(j)%sf(idwbuff(1 )%beg:idwbuff(1 )%end, &
143+ idwbuff(2 )%beg:idwbuff(2 )%end, &
144+ idwbuff(3 )%beg:idwbuff(3 )%end) = > q_cons_ts_pool_host(:, :, :, j)
145+ end if
147146 end do
148147
149148 do i = 1 , num_ts
@@ -520,6 +519,7 @@ contains
520519 real (wp), intent (inout ) :: time_avg
521520
522521 integer :: i, j, k, l, q!< Generic loop iterator
522+ integer :: dest
523523 real (wp) :: start, finish
524524
525525 ! Stage 1 of 2
@@ -565,6 +565,8 @@ contains
565565 end do
566566 end do
567567 end do
568+
569+ dest = 1 ! Result in q_cons_ts(1 )%vf
568570#else
569571 $:GPU_PARALLEL_LOOP(collapse= 4 )
570572 do i = 1 , sys_size
@@ -578,6 +580,8 @@ contains
578580 end do
579581 end do
580582 end do
583+
584+ dest = 2 ! Result in q_cons_ts(2 )%vf
581585#endif
582586
583587 !Evolve pb and mv for non- polytropic qbmm
@@ -615,21 +619,21 @@ contains
615619 end do
616620 end if
617621
618- if (bodyForces) call s_apply_bodyforces(q_cons_ts(2 )%vf, q_prim_vf, rhs_vf, dt)
622+ if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest )%vf, q_prim_vf, rhs_vf, dt)
619623
620- if (grid_geometry == 3 ) call s_apply_fourier_filter(q_cons_ts(2 )%vf)
624+ if (grid_geometry == 3 ) call s_apply_fourier_filter(q_cons_ts(dest )%vf)
621625
622626 if (model_eqns == 3 .and. (.not. relax)) then
623- call s_pressure_relaxation_procedure(q_cons_ts(2 )%vf)
627+ call s_pressure_relaxation_procedure(q_cons_ts(dest )%vf)
624628 end if
625629
626- if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2 )%vf)
630+ if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest )%vf)
627631
628632 if (ib) then
629633 if (qbmm .and. .not. polytropic) then
630- call s_ibm_correct_state(q_cons_ts(2 )%vf, q_prim_vf, pb_ts(2 )%sf, mv_ts(2 )%sf)
634+ call s_ibm_correct_state(q_cons_ts(dest )%vf, q_prim_vf, pb_ts(2 )%sf, mv_ts(2 )%sf)
631635 else
632- call s_ibm_correct_state(q_cons_ts(2 )%vf, q_prim_vf)
636+ call s_ibm_correct_state(q_cons_ts(dest )%vf, q_prim_vf)
633637 end if
634638 end if
635639
@@ -656,6 +660,8 @@ contains
656660 end do
657661 end do
658662 end do
663+
664+ dest = 1 ! Result in q_cons_ts(1 )%vf
659665#else
660666 $:GPU_PARALLEL_LOOP(collapse= 4 )
661667 do i = 1 , sys_size
@@ -670,6 +676,8 @@ contains
670676 end do
671677 end do
672678 end do
679+
680+ dest = 1 ! Result in q_cons_ts(1 )%vf
673681#endif
674682
675683 if (qbmm .and. (.not. polytropic)) then
@@ -708,21 +716,21 @@ contains
708716 end do
709717 end if
710718
711- if (bodyForces) call s_apply_bodyforces(q_cons_ts(1 )%vf, q_prim_vf, rhs_vf, 2._wp * dt/ 3._wp )
719+ if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest )%vf, q_prim_vf, rhs_vf, 2._wp * dt/ 3._wp )
712720
713- if (grid_geometry == 3 ) call s_apply_fourier_filter(q_cons_ts(1 )%vf)
721+ if (grid_geometry == 3 ) call s_apply_fourier_filter(q_cons_ts(dest )%vf)
714722
715723 if (model_eqns == 3 .and. (.not. relax)) then
716- call s_pressure_relaxation_procedure(q_cons_ts(1 )%vf)
724+ call s_pressure_relaxation_procedure(q_cons_ts(dest )%vf)
717725 end if
718726
719- if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1 )%vf)
727+ if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest )%vf)
720728
721729 if (ib) then
722730 if (qbmm .and. .not. polytropic) then
723- call s_ibm_correct_state(q_cons_ts(1 )%vf, q_prim_vf, pb_ts(1 )%sf, mv_ts(1 )%sf)
731+ call s_ibm_correct_state(q_cons_ts(dest )%vf, q_prim_vf, pb_ts(1 )%sf, mv_ts(1 )%sf)
724732 else
725- call s_ibm_correct_state(q_cons_ts(1 )%vf, q_prim_vf)
733+ call s_ibm_correct_state(q_cons_ts(dest )%vf, q_prim_vf)
726734 end if
727735 end if
728736
@@ -740,6 +748,7 @@ contains
740748 real (wp), intent (INOUT ) :: time_avg
741749
742750 integer :: i, j, k, l, q !< Generic loop iterator
751+ integer :: dest
743752
744753 real (wp) :: start, finish
745754
@@ -787,6 +796,8 @@ contains
787796 end do
788797 end do
789798 end do
799+
800+ dest = 1 ! result in q_cons_ts(1 )%vf
790801#else
791802 $:GPU_PARALLEL_LOOP(collapse= 4 )
792803 do i = 1 , sys_size
@@ -800,6 +811,8 @@ contains
800811 end do
801812 end do
802813 end do
814+
815+ dest = 2 ! result in q_cons_ts(2 )%vf
803816#endif
804817
805818 !Evolve pb and mv for non- polytropic qbmm
@@ -837,21 +850,21 @@ contains
837850 end do
838851 end if
839852
840- if (bodyForces) call s_apply_bodyforces(q_cons_ts(2 )%vf, q_prim_vf, rhs_vf, dt)
853+ if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest )%vf, q_prim_vf, rhs_vf, dt)
841854
842- if (grid_geometry == 3 ) call s_apply_fourier_filter(q_cons_ts(2 )%vf)
855+ if (grid_geometry == 3 ) call s_apply_fourier_filter(q_cons_ts(dest )%vf)
843856
844857 if (model_eqns == 3 .and. (.not. relax)) then
845- call s_pressure_relaxation_procedure(q_cons_ts(2 )%vf)
858+ call s_pressure_relaxation_procedure(q_cons_ts(dest )%vf)
846859 end if
847860
848- if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2 )%vf)
861+ if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest )%vf)
849862
850863 if (ib) then
851864 if (qbmm .and. .not. polytropic) then
852- call s_ibm_correct_state(q_cons_ts(2 )%vf, q_prim_vf, pb_ts(2 )%sf, mv_ts(2 )%sf)
865+ call s_ibm_correct_state(q_cons_ts(dest )%vf, q_prim_vf, pb_ts(2 )%sf, mv_ts(2 )%sf)
853866 else
854- call s_ibm_correct_state(q_cons_ts(2 )%vf, q_prim_vf)
867+ call s_ibm_correct_state(q_cons_ts(dest )%vf, q_prim_vf)
855868 end if
856869 end if
857870
@@ -878,6 +891,8 @@ contains
878891 end do
879892 end do
880893 end do
894+
895+ dest = 1 ! Result in q_cons_ts(1 )%vf
881896#else
882897 $:GPU_PARALLEL_LOOP(collapse= 4 )
883898 do i = 1 , sys_size
@@ -892,6 +907,8 @@ contains
892907 end do
893908 end do
894909 end do
910+
911+ dest = 2 ! Result in q_cons_ts(2 )%vf
895912#endif
896913
897914 if (qbmm .and. (.not. polytropic)) then
@@ -930,21 +947,21 @@ contains
930947 end do
931948 end if
932949
933- if (bodyForces) call s_apply_bodyforces(q_cons_ts(2 )%vf, q_prim_vf, rhs_vf, dt/ 4._wp )
950+ if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest )%vf, q_prim_vf, rhs_vf, dt/ 4._wp )
934951
935- if (grid_geometry == 3 ) call s_apply_fourier_filter(q_cons_ts(2 )%vf)
952+ if (grid_geometry == 3 ) call s_apply_fourier_filter(q_cons_ts(dest )%vf)
936953
937954 if (model_eqns == 3 .and. (.not. relax)) then
938- call s_pressure_relaxation_procedure(q_cons_ts(2 )%vf)
955+ call s_pressure_relaxation_procedure(q_cons_ts(dest )%vf)
939956 end if
940957
941- if (adv_n) call s_comp_alpha_from_n(q_cons_ts(2 )%vf)
958+ if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest )%vf)
942959
943960 if (ib) then
944961 if (qbmm .and. .not. polytropic) then
945- call s_ibm_correct_state(q_cons_ts(2 )%vf, q_prim_vf, pb_ts(2 )%sf, mv_ts(2 )%sf)
962+ call s_ibm_correct_state(q_cons_ts(dest )%vf, q_prim_vf, pb_ts(2 )%sf, mv_ts(2 )%sf)
946963 else
947- call s_ibm_correct_state(q_cons_ts(2 )%vf, q_prim_vf)
964+ call s_ibm_correct_state(q_cons_ts(dest )%vf, q_prim_vf)
948965 end if
949966 end if
950967
@@ -971,6 +988,8 @@ contains
971988 end do
972989 end do
973990 end do
991+
992+ dest = 1 ! Result in q_cons_ts(1 )%vf
974993#else
975994 $:GPU_PARALLEL_LOOP(collapse= 4 )
976995 do i = 1 , sys_size
@@ -985,6 +1004,8 @@ contains
9851004 end do
9861005 end do
9871006 end do
1007+
1008+ dest = 1 ! Result in q_cons_ts(2 )%vf
9881009#endif
9891010
9901011 if (qbmm .and. (.not. polytropic)) then
@@ -1023,25 +1044,25 @@ contains
10231044 end do
10241045 end if
10251046
1026- if (bodyForces) call s_apply_bodyforces(q_cons_ts(1 )%vf, q_prim_vf, rhs_vf, 2._wp * dt/ 3._wp )
1047+ if (bodyForces) call s_apply_bodyforces(q_cons_ts(dest )%vf, q_prim_vf, rhs_vf, 2._wp * dt/ 3._wp )
10271048
1028- if (grid_geometry == 3 ) call s_apply_fourier_filter(q_cons_ts(1 )%vf)
1049+ if (grid_geometry == 3 ) call s_apply_fourier_filter(q_cons_ts(dest )%vf)
10291050
10301051 if (model_eqns == 3 .and. (.not. relax)) then
1031- call s_pressure_relaxation_procedure(q_cons_ts(1 )%vf)
1052+ call s_pressure_relaxation_procedure(q_cons_ts(dest )%vf)
10321053 end if
10331054
10341055 call nvtxStartRange(" RHS-ELASTIC" )
1035- if (hyperelasticity) call s_hyperelastic_rmt_stress_update(q_cons_ts(1 )%vf, q_prim_vf)
1056+ if (hyperelasticity) call s_hyperelastic_rmt_stress_update(q_cons_ts(dest )%vf, q_prim_vf)
10361057 call nvtxEndRange
10371058
1038- if (adv_n) call s_comp_alpha_from_n(q_cons_ts(1 )%vf)
1059+ if (adv_n) call s_comp_alpha_from_n(q_cons_ts(dest )%vf)
10391060
10401061 if (ib) then
10411062 if (qbmm .and. .not. polytropic) then
1042- call s_ibm_correct_state(q_cons_ts(1 )%vf, q_prim_vf, pb_ts(1 )%sf, mv_ts(1 )%sf)
1063+ call s_ibm_correct_state(q_cons_ts(dest )%vf, q_prim_vf, pb_ts(1 )%sf, mv_ts(1 )%sf)
10431064 else
1044- call s_ibm_correct_state(q_cons_ts(1 )%vf, q_prim_vf)
1065+ call s_ibm_correct_state(q_cons_ts(dest )%vf, q_prim_vf)
10451066 end if
10461067 end if
10471068
0 commit comments