@@ -75,8 +75,14 @@ module m_time_steppers
7575 integer , private :: num_ts !<
7676 !! Number of time stages in the time- stepping scheme
7777
78+ integer , private :: out_of_core
79+
7880 $:GPU_DECLARE(create= ' [q_cons_ts,q_prim_vf,q_T_sf,rhs_vf,q_prim_ts,rhs_mv,rhs_pb,max_dt]' )
7981
82+ #ifdef __NVCOMPILER_GPU_UNIFIED_MEM
83+ real (wp), allocatable, dimension (:, :, :, :), pinned, target :: q_cons_ts_pool_host
84+ #endif
85+
8086contains
8187
8288 !> The computation of parameters, the allocation of memory,
@@ -86,6 +92,21 @@ contains
8692
8793 integer :: i, j !< Generic loop iterators
8894
95+ character (len= 10 ) :: out_of_core_str
96+ out_of_core = 0
97+
98+ #ifdef __NVCOMPILER_GPU_UNIFIED_MEM
99+ call get_environment_variable(" MFC_OUT_OF_CORE" , out_of_core_str)
100+
101+ if (trim (out_of_core_str) == " 0" ) then
102+ out_of_core = 0
103+ elseif (trim (out_of_core_str) == " 1" ) then
104+ out_of_core = 1
105+ else ! default
106+ out_of_core = 0
107+ endif
108+ #endif
109+
89110 ! Setting number of time- stages for selected time- stepping scheme
90111 if (time_stepper == 1 ) then
91112 num_ts = 1
@@ -102,12 +123,33 @@ contains
102123 @:PREFER_GPU(q_cons_ts(i)%vf)
103124 end do
104125
126+ #ifdef __NVCOMPILER_GPU_UNIFIED_MEM
127+ if ( out_of_core == 1 ) then
128+ allocate(q_cons_ts_pool_host(idwbuff(1 )%beg:idwbuff(1 )%end, &
129+ idwbuff(2 )%beg:idwbuff(2 )%end, &
130+ idwbuff(3 )%beg:idwbuff(3 )%end, &
131+ 1 :sys_size))
132+ end if
133+ #endif
134+
105135 do i = 1 , num_ts
106136 do j = 1 , sys_size
107- @:ALLOCATE(q_cons_ts(i)%vf(j)%sf(idwbuff(1 )%beg:idwbuff(1 )%end, &
108- idwbuff(2 )%beg:idwbuff(2 )%end, &
109- idwbuff(3 )%beg:idwbuff(3 )%end))
110- @:PREFER_GPU(q_cons_ts(i)%vf(j)%sf)
137+ #ifdef __NVCOMPILER_GPU_UNIFIED_MEM
138+ if ( i <= (num_ts - out_of_core) ) then
139+ !print * , " q_cons_ts" , i, j, " on GPU"
140+ #endif
141+ @:ALLOCATE(q_cons_ts(i)%vf(j)%sf(idwbuff(1 )%beg:idwbuff(1 )%end, &
142+ idwbuff(2 )%beg:idwbuff(2 )%end, &
143+ idwbuff(3 )%beg:idwbuff(3 )%end))
144+ @:PREFER_GPU(q_cons_ts(i)%vf(j)%sf)
145+ #ifdef __NVCOMPILER_GPU_UNIFIED_MEM
146+ else
147+ !print * , " q_cons_ts" , i, j, " on CPU"
148+ q_cons_ts(i)%vf(j)%sf(idwbuff(1 )%beg:idwbuff(1 )%end, &
149+ idwbuff(2 )%beg:idwbuff(2 )%end, &
150+ idwbuff(3 )%beg:idwbuff(3 )%end) = > q_cons_ts_pool_host(:,:,:,j)
151+ end if
152+ #endif
111153 end do
112154 @:ACC_SETUP_VFs(q_cons_ts(i))
113155 end do
@@ -1205,14 +1247,30 @@ contains
12051247 ! Deallocating the cell- average conservative variables
12061248 do i = 1 , num_ts
12071249 do j = 1 , sys_size
1208- @:DEALLOCATE(q_cons_ts(i)%vf(j)%sf)
1250+ #ifdef __NVCOMPILER_GPU_UNIFIED_MEM
1251+ if ( i <= (num_ts - out_of_core) ) then
1252+ !print * , " q_cons_ts" , i, j, " dealloc"
1253+ #endif
1254+ @:DEALLOCATE(q_cons_ts(i)%vf(j)%sf)
1255+ #ifdef __NVCOMPILER_GPU_UNIFIED_MEM
1256+ else
1257+ !print * , " q_cons_ts" , i, j, " nullify"
1258+ nullify(q_cons_ts(i)%vf(j)%sf)
1259+ end if
1260+ #endif
12091261 end do
12101262
12111263 @:DEALLOCATE(q_cons_ts(i)%vf)
12121264 end do
12131265
12141266 @:DEALLOCATE(q_cons_ts)
12151267
1268+ #ifdef __NVCOMPILER_GPU_UNIFIED_MEM
1269+ if ( out_of_core == 1 ) then
1270+ deallocate(q_cons_ts_pool_host)
1271+ end if
1272+ #endif
1273+
12161274 ! Deallocating the cell- average primitive ts variables
12171275 if (probe_wrt) then
12181276 do i = 0 , 3
0 commit comments