@@ -95,16 +95,19 @@ contains
9595
9696 ! Allocating the cell- average conservative variables
9797 @:ALLOCATE(q_cons_ts(1 :num_ts))
98+ @:PREFER_GPU(q_cons_ts)
9899
99100 do i = 1 , num_ts
100101 @:ALLOCATE(q_cons_ts(i)%vf(1 :sys_size))
102+ @:PREFER_GPU(q_cons_ts(i)%vf)
101103 end do
102104
103105 do i = 1 , num_ts
104106 do j = 1 , sys_size
105107 @:ALLOCATE(q_cons_ts(i)%vf(j)%sf(idwbuff(1 )%beg:idwbuff(1 )%end, &
106108 idwbuff(2 )%beg:idwbuff(2 )%end, &
107109 idwbuff(3 )%beg:idwbuff(3 )%end))
110+ @:PREFER_GPU(q_cons_ts(i)%vf(j)%sf)
108111 end do
109112 @:ACC_SETUP_VFs(q_cons_ts(i))
110113 end do
@@ -304,11 +307,13 @@ contains
304307
305308 ! Allocating the cell- average RHS variables
306309 @:ALLOCATE(rhs_vf(1 :sys_size))
310+ @:PREFER_GPU(rhs_vf)
307311
308312 if (igr) then
309313 do i = 1 , sys_size
310314 @:ALLOCATE(rhs_vf(i)%sf(- 1 :m+1 ,- 1 :n+1 ,- 1 :p+1 ))
311315 @:ACC_SETUP_SFs(rhs_vf(i))
316+ @:PREFER_GPU(rhs_vf(i)%sf)
312317 end do
313318 else
314319 do i = 1 , sys_size
@@ -650,6 +655,7 @@ contains
650655 real (wp), intent (INOUT ) :: time_avg
651656
652657 integer :: i, j, k, l, q !< Generic loop iterator
658+ integer :: dest
653659
654660 real (wp) :: start, finish
655661
@@ -682,6 +688,7 @@ contains
682688
683689 if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage= 1 )
684690
691+ #if !defined(__NVCOMPILER_GPU_UNIFIED_MEM)
685692 $:GPU_PARALLEL_LOOP(collapse= 4 )
686693 do i = 1 , sys_size
687694 do l = 0 , p
@@ -694,6 +701,24 @@ contains
694701 end do
695702 end do
696703 end do
704+ dest = 2 ! result in q_cons_ts(2 )%vf
705+ #else
706+ $:GPU_PARALLEL_LOOP(collapse= 4 )
707+ do i = 1 , sys_size
708+ do l = 0 , p
709+ do k = 0 , n
710+ do j = 0 , m
711+ q_cons_ts(2 )%vf(i)%sf(j, k, l) = &
712+ q_cons_ts(1 )%vf(i)%sf(j, k, l)
713+ q_cons_ts(1 )%vf(i)%sf(j, k, l) = &
714+ q_cons_ts(1 )%vf(i)%sf(j, k, l) &
715+ + dt* rhs_vf(i)%sf(j, k, l)
716+ end do
717+ end do
718+ end do
719+ end do
720+ dest = 1 ! result in q_cons_ts(1 )%vf
721+ #endif
697722
698723 !Evolve pb and mv for non- polytropic qbmm
699724 if (qbmm .and. (.not. polytropic)) then
@@ -750,10 +775,11 @@ contains
750775
751776 ! Stage 2 of 3
752777
753- call s_compute_rhs(q_cons_ts(2 )%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2 )%sf, rhs_pb, mv_ts(2 )%sf, rhs_mv, t_step, time_avg, 2 )
778+ call s_compute_rhs(q_cons_ts(dest )%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2 )%sf, rhs_pb, mv_ts(2 )%sf, rhs_mv, t_step, time_avg, 2 )
754779
755780 if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage= 2 )
756781
782+ #if !defined(__NVCOMPILER_GPU_UNIFIED_MEM)
757783 $:GPU_PARALLEL_LOOP(collapse= 4 )
758784 do i = 1 , sys_size
759785 do l = 0 , p
@@ -767,6 +793,23 @@ contains
767793 end do
768794 end do
769795 end do
796+ dest = 2 ! result in q_cons_ts(2 )%vf
797+ #else
798+ $:GPU_PARALLEL_LOOP(collapse= 4 )
799+ do i = 1 , sys_size
800+ do l = 0 , p
801+ do k = 0 , n
802+ do j = 0 , m
803+ q_cons_ts(1 )%vf(i)%sf(j, k, l) = &
804+ (3._wp * q_cons_ts(2 )%vf(i)%sf(j, k, l) &
805+ + q_cons_ts(1 )%vf(i)%sf(j, k, l) &
806+ + dt* rhs_vf(i)%sf(j, k, l))/ 4._wp
807+ end do
808+ end do
809+ end do
810+ end do
811+ dest = 1 ! result in q_cons_ts(1 )%vf
812+ #endif
770813
771814 if (qbmm .and. (.not. polytropic)) then
772815 $:GPU_PARALLEL_LOOP(collapse= 5 )
@@ -823,10 +866,11 @@ contains
823866 end if
824867
825868 ! Stage 3 of 3
826- call s_compute_rhs(q_cons_ts(2 )%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2 )%sf, rhs_pb, mv_ts(2 )%sf, rhs_mv, t_step, time_avg, 3 )
869+ call s_compute_rhs(q_cons_ts(dest )%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2 )%sf, rhs_pb, mv_ts(2 )%sf, rhs_mv, t_step, time_avg, 3 )
827870
828871 if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage= 3 )
829872
873+ #if !defined(__NVCOMPILER_GPU_UNIFIED_MEM)
830874 $:GPU_PARALLEL_LOOP(collapse= 4 )
831875 do i = 1 , sys_size
832876 do l = 0 , p
@@ -840,6 +884,23 @@ contains
840884 end do
841885 end do
842886 end do
887+ dest = 1 ! result in q_cons_ts(1 )%vf
888+ #else
889+ $:GPU_PARALLEL_LOOP(collapse= 4 )
890+ do i = 1 , sys_size
891+ do l = 0 , p
892+ do k = 0 , n
893+ do j = 0 , m
894+ q_cons_ts(1 )%vf(i)%sf(j, k, l) = &
895+ (q_cons_ts(2 )%vf(i)%sf(j, k, l) &
896+ + 2._wp * q_cons_ts(1 )%vf(i)%sf(j, k, l) &
897+ + 2._wp * dt* rhs_vf(i)%sf(j, k, l))/ 3._wp
898+ end do
899+ end do
900+ end do
901+ end do
902+ dest = 1 ! result in q_cons_ts(1 )%vf
903+ #endif
843904
844905 if (qbmm .and. (.not. polytropic)) then
845906 $:GPU_PARALLEL_LOOP(collapse= 5 )
0 commit comments