@@ -1164,6 +1164,113 @@ contains
11641164
11651165 if (p > 0) then
11661166
1167+ #ifdef MFC_POST_PROCESS
1168+ if(fft_wrt .and. (.not. file_per_process)) then
1169+
1170+ ! Initial estimate of optimal processor topology
1171+ num_procs_x = 1
1172+ num_procs_y = 1
1173+ num_procs_z = num_procs
1174+ ierr = -1
1175+
1176+ ! Benchmarking the quality of this initial guess
1177+ tmp_num_procs_y = num_procs_y
1178+ tmp_num_procs_z = num_procs_z
1179+ fct_min = 10._wp*abs((n + 1)/tmp_num_procs_y &
1180+ - (p + 1)/tmp_num_procs_z)
1181+
1182+ ! Optimization of the initial processor topology
1183+ do i = 1, num_procs
1184+
1185+ if (mod(num_procs, i) == 0 &
1186+ .and. &
1187+ (n + 1)/i >= num_stcls_min*recon_order) then
1188+
1189+ tmp_num_procs_y = i
1190+ tmp_num_procs_z = num_procs/i
1191+
1192+ if (fct_min >= abs((n + 1)/tmp_num_procs_y &
1193+ - (p + 1)/tmp_num_procs_z) &
1194+ .and. &
1195+ (p + 1)/tmp_num_procs_z &
1196+ >= &
1197+ num_stcls_min*recon_order) then
1198+
1199+ num_procs_y = i
1200+ num_procs_z = num_procs/i
1201+ fct_min = abs((n + 1)/tmp_num_procs_y &
1202+ - (p + 1)/tmp_num_procs_z)
1203+ ierr = 0
1204+
1205+ end if
1206+
1207+ end if
1208+
1209+ end do
1210+
1211+ else
1212+ ! Initial estimate of optimal processor topology
1213+ num_procs_x = 1
1214+ num_procs_y = 1
1215+ num_procs_z = num_procs
1216+ ierr = -1
1217+
1218+ ! Benchmarking the quality of this initial guess
1219+ tmp_num_procs_x = num_procs_x
1220+ tmp_num_procs_y = num_procs_y
1221+ tmp_num_procs_z = num_procs_z
1222+ fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
1223+ - (n + 1)/tmp_num_procs_y) &
1224+ + 10._wp*abs((n + 1)/tmp_num_procs_y &
1225+ - (p + 1)/tmp_num_procs_z)
1226+
1227+ ! Optimization of the initial processor topology
1228+ do i = 1, num_procs
1229+
1230+ if (mod(num_procs, i) == 0 &
1231+ .and. &
1232+ (m + 1)/i >= num_stcls_min*recon_order) then
1233+
1234+ do j = 1, num_procs/i
1235+
1236+ if (mod(num_procs/i, j) == 0 &
1237+ .and. &
1238+ (n + 1)/j >= num_stcls_min*recon_order) then
1239+
1240+ tmp_num_procs_x = i
1241+ tmp_num_procs_y = j
1242+ tmp_num_procs_z = num_procs/(i*j)
1243+
1244+ if (fct_min >= abs((m + 1)/tmp_num_procs_x &
1245+ - (n + 1)/tmp_num_procs_y) &
1246+ + abs((n + 1)/tmp_num_procs_y &
1247+ - (p + 1)/tmp_num_procs_z) &
1248+ .and. &
1249+ (p + 1)/tmp_num_procs_z &
1250+ >= &
1251+ num_stcls_min*recon_order) &
1252+ then
1253+
1254+ num_procs_x = i
1255+ num_procs_y = j
1256+ num_procs_z = num_procs/(i*j)
1257+ fct_min = abs((m + 1)/tmp_num_procs_x &
1258+ - (n + 1)/tmp_num_procs_y) &
1259+ + abs((n + 1)/tmp_num_procs_y &
1260+ - (p + 1)/tmp_num_procs_z)
1261+ ierr = 0
1262+
1263+ end if
1264+
1265+ end if
1266+
1267+ end do
1268+
1269+ end if
1270+
1271+ end do
1272+ end if
1273+ #else
11671274 if (cyl_coord .and. p > 0) then
11681275 ! Implement pencil processor blocking if using cylindrical coordinates so
11691276 ! that all cells in azimuthal direction are stored on a single processor.
@@ -1275,7 +1382,7 @@ contains
12751382 end do
12761383
12771384 end if
1278-
1385+ #endif
12791386 ! Verifying that a valid decomposition of the computational
12801387 ! domain has been established. If not, the simulation exits.
12811388 if (proc_rank == 0 .and. ierr == -1) then
0 commit comments