Skip to content

Commit a7c8e14

Browse files
author
Anand
committed
Compiles, need to test
1 parent 0a68648 commit a7c8e14

File tree

6 files changed

+461
-5
lines changed

6 files changed

+461
-5
lines changed

src/common/m_mpi_common.fpp

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1164,6 +1164,113 @@ contains
11641164
11651165
if (p > 0) then
11661166
1167+
#ifdef MFC_POST_PROCESS
1168+
if(fft_wrt .and. (.not. file_per_process)) then
1169+
1170+
! Initial estimate of optimal processor topology
1171+
num_procs_x = 1
1172+
num_procs_y = 1
1173+
num_procs_z = num_procs
1174+
ierr = -1
1175+
1176+
! Benchmarking the quality of this initial guess
1177+
tmp_num_procs_y = num_procs_y
1178+
tmp_num_procs_z = num_procs_z
1179+
fct_min = 10._wp*abs((n + 1)/tmp_num_procs_y &
1180+
- (p + 1)/tmp_num_procs_z)
1181+
1182+
! Optimization of the initial processor topology
1183+
do i = 1, num_procs
1184+
1185+
if (mod(num_procs, i) == 0 &
1186+
.and. &
1187+
(n + 1)/i >= num_stcls_min*recon_order) then
1188+
1189+
tmp_num_procs_y = i
1190+
tmp_num_procs_z = num_procs/i
1191+
1192+
if (fct_min >= abs((n + 1)/tmp_num_procs_y &
1193+
- (p + 1)/tmp_num_procs_z) &
1194+
.and. &
1195+
(p + 1)/tmp_num_procs_z &
1196+
>= &
1197+
num_stcls_min*recon_order) then
1198+
1199+
num_procs_y = i
1200+
num_procs_z = num_procs/i
1201+
fct_min = abs((n + 1)/tmp_num_procs_y &
1202+
- (p + 1)/tmp_num_procs_z)
1203+
ierr = 0
1204+
1205+
end if
1206+
1207+
end if
1208+
1209+
end do
1210+
1211+
else
1212+
! Initial estimate of optimal processor topology
1213+
num_procs_x = 1
1214+
num_procs_y = 1
1215+
num_procs_z = num_procs
1216+
ierr = -1
1217+
1218+
! Benchmarking the quality of this initial guess
1219+
tmp_num_procs_x = num_procs_x
1220+
tmp_num_procs_y = num_procs_y
1221+
tmp_num_procs_z = num_procs_z
1222+
fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
1223+
- (n + 1)/tmp_num_procs_y) &
1224+
+ 10._wp*abs((n + 1)/tmp_num_procs_y &
1225+
- (p + 1)/tmp_num_procs_z)
1226+
1227+
! Optimization of the initial processor topology
1228+
do i = 1, num_procs
1229+
1230+
if (mod(num_procs, i) == 0 &
1231+
.and. &
1232+
(m + 1)/i >= num_stcls_min*recon_order) then
1233+
1234+
do j = 1, num_procs/i
1235+
1236+
if (mod(num_procs/i, j) == 0 &
1237+
.and. &
1238+
(n + 1)/j >= num_stcls_min*recon_order) then
1239+
1240+
tmp_num_procs_x = i
1241+
tmp_num_procs_y = j
1242+
tmp_num_procs_z = num_procs/(i*j)
1243+
1244+
if (fct_min >= abs((m + 1)/tmp_num_procs_x &
1245+
- (n + 1)/tmp_num_procs_y) &
1246+
+ abs((n + 1)/tmp_num_procs_y &
1247+
- (p + 1)/tmp_num_procs_z) &
1248+
.and. &
1249+
(p + 1)/tmp_num_procs_z &
1250+
>= &
1251+
num_stcls_min*recon_order) &
1252+
then
1253+
1254+
num_procs_x = i
1255+
num_procs_y = j
1256+
num_procs_z = num_procs/(i*j)
1257+
fct_min = abs((m + 1)/tmp_num_procs_x &
1258+
- (n + 1)/tmp_num_procs_y) &
1259+
+ abs((n + 1)/tmp_num_procs_y &
1260+
- (p + 1)/tmp_num_procs_z)
1261+
ierr = 0
1262+
1263+
end if
1264+
1265+
end if
1266+
1267+
end do
1268+
1269+
end if
1270+
1271+
end do
1272+
end if
1273+
#else
11671274
if (cyl_coord .and. p > 0) then
11681275
! Implement pencil processor blocking if using cylindrical coordinates so
11691276
! that all cells in azimuthal direction are stored on a single processor.
@@ -1275,7 +1382,7 @@ contains
12751382
end do
12761383
12771384
end if
1278-
1385+
#endif
12791386
! Verifying that a valid decomposition of the computational
12801387
! domain has been established. If not, the simulation exits.
12811388
if (proc_rank == 0 .and. ierr == -1) then

src/post_process/m_checker.fpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ contains
3232
call s_check_inputs_flux_limiter
3333
call s_check_inputs_volume_fraction
3434
call s_check_inputs_vorticity
35+
call s_check_inputs_fft
3536
call s_check_inputs_qm
3637
call s_check_inputs_liutex
3738
call s_check_inputs_schlieren
@@ -111,6 +112,13 @@ contains
111112
@:PROHIBIT(any(omega_wrt) .and. fd_order == dflt_int, "fd_order must be set for omega_wrt")
112113
end subroutine s_check_inputs_vorticity
113114

115+
!> Checks constraints on fft_wrt
116+
impure subroutine s_check_inputs_fft
117+
@:PROHIBIT(fft_wrt .and. (.not. file_per_process), "Turn off file_per_process with fft_wrt")
118+
@:PROHIBIT(fft_wrt .and. (n == 0 .or. p == 0), "FFT WRT only in 3D")
119+
@:PROHIBIT(fft_wrt .and. (MOD(m+1,2) == 1 .or. MOD(n+1,2) == 1 .or. MOD(p+1,2) == 1), "FFT WRT requires local dimensions divisible by 2")
120+
end subroutine s_check_inputs_fft
121+
114122
!> Checks constraints on Q-criterion parameters
115123
impure subroutine s_check_inputs_qm
116124
@:PROHIBIT(n == 0 .and. qm_wrt)

src/post_process/m_global_parameters.fpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ module m_global_parameters
241241
integer :: flux_lim
242242
logical, dimension(3) :: flux_wrt
243243
logical :: E_wrt
244+
logical :: fft_wrt
244245
logical :: pres_wrt
245246
logical, dimension(num_fluids_max) :: alpha_wrt
246247
logical :: gamma_wrt
@@ -422,6 +423,7 @@ contains
422423
parallel_io = .false.
423424
file_per_process = .false.
424425
E_wrt = .false.
426+
fft_wrt = .false.
425427
pres_wrt = .false.
426428
alpha_wrt = .false.
427429
gamma_wrt = .false.

src/post_process/m_mpi_proxy.fpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ contains
105105
& 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', &
106106
& 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', &
107107
& 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io', &
108-
& 'down_sample' ]
108+
& 'down_sample','fft_wrt' ]
109109
110110
call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
111111
#:endfor

0 commit comments

Comments
 (0)