@@ -1163,9 +1163,7 @@ contains
11631163 if (n > 0) then
11641164
11651165 if (p > 0) then
1166-
1167- #ifdef MFC_POST_PROCESS
1168- if (fft_wrt .and. (.not. file_per_process)) then
1166+ if (fft_wrt) then
11691167
11701168 ! Initial estimate of optimal processor topology
11711169 num_procs_x = 1
@@ -1207,182 +1205,121 @@ contains
12071205 end if
12081206
12091207 end do
1210-
12111208 else
1212- ! Initial estimate of optimal processor topology
1213- num_procs_x = 1
1214- num_procs_y = 1
1215- num_procs_z = num_procs
1216- ierr = -1
12171209
1218- ! Benchmarking the quality of this initial guess
1219- tmp_num_procs_x = num_procs_x
1220- tmp_num_procs_y = num_procs_y
1221- tmp_num_procs_z = num_procs_z
1222- fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
1223- - (n + 1)/tmp_num_procs_y) &
1224- + 10._wp*abs((n + 1)/tmp_num_procs_y &
1225- - (p + 1)/tmp_num_procs_z)
1210+ if (cyl_coord .and. p > 0) then
1211+ ! Implement pencil processor blocking if using cylindrical coordinates so
1212+ ! that all cells in azimuthal direction are stored on a single processor.
1213+ ! This is necessary for efficient application of Fourier filter near axis.
12261214
1227- ! Optimization of the initial processor topology
1228- do i = 1, num_procs
1215+ ! Initial values of the processor factorization optimization
1216+ num_procs_x = 1
1217+ num_procs_y = num_procs
1218+ num_procs_z = 1
1219+ ierr = -1
12291220
1230- if (mod(num_procs, i) == 0 &
1231- .and. &
1232- (m + 1)/i >= num_stcls_min*recon_order) then
1221+ ! Computing minimization variable for these initial values
1222+ tmp_num_procs_x = num_procs_x
1223+ tmp_num_procs_y = num_procs_y
1224+ tmp_num_procs_z = num_procs_z
1225+ fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
1226+ - (n + 1)/tmp_num_procs_y)
12331227
1234- do j = 1, num_procs/i
1228+ ! Searching for optimal computational domain distribution
1229+ do i = 1, num_procs
12351230
1236- if (mod(num_procs/i, j ) == 0 &
1237- .and. &
1238- (n + 1)/j >= num_stcls_min*recon_order) then
1231+ if (mod(num_procs, i ) == 0 &
1232+ .and. &
1233+ (m + 1)/i >= num_stcls_min*recon_order) then
12391234
1240- tmp_num_procs_x = i
1241- tmp_num_procs_y = j
1242- tmp_num_procs_z = num_procs/(i*j)
1235+ tmp_num_procs_x = i
1236+ tmp_num_procs_y = num_procs/i
12431237
1244- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
1245- - (n + 1)/tmp_num_procs_y) &
1246- + abs((n + 1)/tmp_num_procs_y &
1247- - (p + 1)/tmp_num_procs_z) &
1248- .and. &
1249- (p + 1)/tmp_num_procs_z &
1250- >= &
1251- num_stcls_min*recon_order) &
1252- then
1253-
1254- num_procs_x = i
1255- num_procs_y = j
1256- num_procs_z = num_procs/(i*j)
1257- fct_min = abs((m + 1)/tmp_num_procs_x &
1258- - (n + 1)/tmp_num_procs_y) &
1259- + abs((n + 1)/tmp_num_procs_y &
1260- - (p + 1)/tmp_num_procs_z)
1261- ierr = 0
1238+ if (fct_min >= abs((m + 1)/tmp_num_procs_x &
1239+ - (n + 1)/tmp_num_procs_y) &
1240+ .and. &
1241+ (n + 1)/tmp_num_procs_y &
1242+ >= &
1243+ num_stcls_min*recon_order) then
12621244
1263- end if
1245+ num_procs_x = i
1246+ num_procs_y = num_procs/i
1247+ fct_min = abs((m + 1)/tmp_num_procs_x &
1248+ - (n + 1)/tmp_num_procs_y)
1249+ ierr = 0
12641250
12651251 end if
12661252
1267- end do
1268-
1269- end if
1270-
1271- end do
1272- end if
1273- #else
1274- if (cyl_coord .and. p > 0) then
1275- ! Implement pencil processor blocking if using cylindrical coordinates so
1276- ! that all cells in azimuthal direction are stored on a single processor.
1277- ! This is necessary for efficient application of Fourier filter near axis.
1278-
1279- ! Initial values of the processor factorization optimization
1280- num_procs_x = 1
1281- num_procs_y = num_procs
1282- num_procs_z = 1
1283- ierr = -1
1284-
1285- ! Computing minimization variable for these initial values
1286- tmp_num_procs_x = num_procs_x
1287- tmp_num_procs_y = num_procs_y
1288- tmp_num_procs_z = num_procs_z
1289- fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
1290- - (n + 1)/tmp_num_procs_y)
1291-
1292- ! Searching for optimal computational domain distribution
1293- do i = 1, num_procs
1294-
1295- if (mod(num_procs, i) == 0 &
1296- .and. &
1297- (m + 1)/i >= num_stcls_min*recon_order) then
1298-
1299- tmp_num_procs_x = i
1300- tmp_num_procs_y = num_procs/i
1301-
1302- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
1303- - (n + 1)/tmp_num_procs_y) &
1304- .and. &
1305- (n + 1)/tmp_num_procs_y &
1306- >= &
1307- num_stcls_min*recon_order) then
1308-
1309- num_procs_x = i
1310- num_procs_y = num_procs/i
1311- fct_min = abs((m + 1)/tmp_num_procs_x &
1312- - (n + 1)/tmp_num_procs_y)
1313- ierr = 0
1314-
13151253 end if
13161254
1317- end if
1318-
1319- end do
1320-
1321- else
1322-
1323- ! Initial estimate of optimal processor topology
1324- num_procs_x = 1
1325- num_procs_y = 1
1326- num_procs_z = num_procs
1327- ierr = -1
1328-
1329- ! Benchmarking the quality of this initial guess
1330- tmp_num_procs_x = num_procs_x
1331- tmp_num_procs_y = num_procs_y
1332- tmp_num_procs_z = num_procs_z
1333- fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
1334- - (n + 1)/tmp_num_procs_y) &
1335- + 10._wp*abs((n + 1)/tmp_num_procs_y &
1336- - (p + 1)/tmp_num_procs_z)
1337-
1338- ! Optimization of the initial processor topology
1339- do i = 1, num_procs
1340-
1341- if (mod(num_procs, i) == 0 &
1342- .and. &
1343- (m + 1)/i >= num_stcls_min*recon_order) then
1255+ end do
13441256
1345- do j = 1, num_procs/i
1257+ else
13461258
1347- if (mod(num_procs/i, j) == 0 &
1348- .and. &
1349- (n + 1)/j >= num_stcls_min*recon_order) then
1259+ ! Initial estimate of optimal processor topology
1260+ num_procs_x = 1
1261+ num_procs_y = 1
1262+ num_procs_z = num_procs
1263+ ierr = -1
1264+
1265+ ! Benchmarking the quality of this initial guess
1266+ tmp_num_procs_x = num_procs_x
1267+ tmp_num_procs_y = num_procs_y
1268+ tmp_num_procs_z = num_procs_z
1269+ fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
1270+ - (n + 1)/tmp_num_procs_y) &
1271+ + 10._wp*abs((n + 1)/tmp_num_procs_y &
1272+ - (p + 1)/tmp_num_procs_z)
1273+
1274+ ! Optimization of the initial processor topology
1275+ do i = 1, num_procs
1276+
1277+ if (mod(num_procs, i) == 0 &
1278+ .and. &
1279+ (m + 1)/i >= num_stcls_min*recon_order) then
13501280
1351- tmp_num_procs_x = i
1352- tmp_num_procs_y = j
1353- tmp_num_procs_z = num_procs/(i*j)
1281+ do j = 1, num_procs/i
13541282
1355- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
1356- - (n + 1)/tmp_num_procs_y) &
1357- + abs((n + 1)/tmp_num_procs_y &
1358- - (p + 1)/tmp_num_procs_z) &
1283+ if (mod(num_procs/i, j) == 0 &
13591284 .and. &
1360- (p + 1)/tmp_num_procs_z &
1361- >= &
1362- num_stcls_min*recon_order) &
1363- then
1364-
1365- num_procs_x = i
1366- num_procs_y = j
1367- num_procs_z = num_procs/(i*j)
1368- fct_min = abs((m + 1)/tmp_num_procs_x &
1369- - (n + 1)/tmp_num_procs_y) &
1370- + abs((n + 1)/tmp_num_procs_y &
1371- - (p + 1)/tmp_num_procs_z)
1372- ierr = 0
1285+ (n + 1)/j >= num_stcls_min*recon_order) then
1286+
1287+ tmp_num_procs_x = i
1288+ tmp_num_procs_y = j
1289+ tmp_num_procs_z = num_procs/(i*j)
1290+
1291+ if (fct_min >= abs((m + 1)/tmp_num_procs_x &
1292+ - (n + 1)/tmp_num_procs_y) &
1293+ + abs((n + 1)/tmp_num_procs_y &
1294+ - (p + 1)/tmp_num_procs_z) &
1295+ .and. &
1296+ (p + 1)/tmp_num_procs_z &
1297+ >= &
1298+ num_stcls_min*recon_order) &
1299+ then
1300+
1301+ num_procs_x = i
1302+ num_procs_y = j
1303+ num_procs_z = num_procs/(i*j)
1304+ fct_min = abs((m + 1)/tmp_num_procs_x &
1305+ - (n + 1)/tmp_num_procs_y) &
1306+ + abs((n + 1)/tmp_num_procs_y &
1307+ - (p + 1)/tmp_num_procs_z)
1308+ ierr = 0
1309+
1310+ end if
13731311
13741312 end if
13751313
1376- end if
1377-
1378- end do
1314+ end do
13791315
1380- end if
1316+ end if
13811317
1382- end do
1318+ end do
13831319
1320+ end if
13841321 end if
1385- #endif
1322+
13861323 ! Verifying that a valid decomposition of the computational
13871324 ! domain has been established. If not, the simulation exits.
13881325 if (proc_rank == 0 .and. ierr == -1) then
0 commit comments