@@ -1174,117 +1174,161 @@ contains
11741174        if (n > 0) then 
11751175
11761176            if (p > 0) then 
1177+                 if (fft_wrt) then 
11771178
1178-                 if (cyl_coord .and. p > 0) then 
1179-                     ! Implement pencil processor blocking if using cylindrical coordinates so 
1180-                     ! that all cells in azimuthal direction are stored on a single processor. 
1181-                     ! This is necessary for efficient application of Fourier filter near axis. 
1182- 
1183-                     ! Initial values of the processor factorization optimization 
1179+                     ! Initial estimate of optimal processor topology 
11841180                    num_procs_x = 1 
1185-                     num_procs_y = num_procs  
1186-                     num_procs_z = 1  
1181+                     num_procs_y = 1  
1182+                     num_procs_z = num_procs  
11871183                    ierr = -1 
11881184
1189-                     ! Computing minimization variable for these initial values 
1190-                     tmp_num_procs_x = num_procs_x 
1185+                     ! Benchmarking the quality of this initial guess 
11911186                    tmp_num_procs_y = num_procs_y 
11921187                    tmp_num_procs_z = num_procs_z 
1193-                     fct_min = 10._wp*abs((m  + 1)/tmp_num_procs_x  & 
1194-                                          - (n  + 1)/tmp_num_procs_y ) 
1188+                     fct_min = 10._wp*abs((n  + 1)/tmp_num_procs_y  & 
1189+                                          - (p  + 1)/tmp_num_procs_z ) 
11951190
1196-                     ! Searching for optimal computational domain distribution  
1191+                     ! Optimization of the initial processor topology  
11971192                    do i = 1, num_procs 
11981193
11991194                        if (mod(num_procs, i) == 0 & 
12001195                            .and. & 
1201-                             (m  + 1)/i >= num_stcls_min*recon_order) then 
1196+                             (n  + 1)/i >= num_stcls_min*recon_order) then 
12021197
1203-                             tmp_num_procs_x  = i 
1204-                             tmp_num_procs_y  = num_procs/i 
1198+                             tmp_num_procs_y  = i 
1199+                             tmp_num_procs_z  = num_procs/i 
12051200
1206-                             if (fct_min >= abs((m  + 1)/tmp_num_procs_x  & 
1207-                                                - (n  + 1)/tmp_num_procs_y ) & 
1201+                             if (fct_min >= abs((n  + 1)/tmp_num_procs_y  & 
1202+                                                - (p  + 1)/tmp_num_procs_z ) & 
12081203                                .and. & 
1209-                                 (n  + 1)/tmp_num_procs_y  & 
1204+                                 (p  + 1)/tmp_num_procs_z  & 
12101205                                >= & 
12111206                                num_stcls_min*recon_order) then 
12121207
1213-                                 num_procs_x  = i 
1214-                                 num_procs_y  = num_procs/i 
1215-                                 fct_min = abs((m  + 1)/tmp_num_procs_x  & 
1216-                                               - (n  + 1)/tmp_num_procs_y ) 
1208+                                 num_procs_y  = i 
1209+                                 num_procs_z  = num_procs/i 
1210+                                 fct_min = abs((n  + 1)/tmp_num_procs_y  & 
1211+                                               - (p  + 1)/tmp_num_procs_z ) 
12171212                                ierr = 0 
12181213
12191214                            end if 
12201215
12211216                        end if 
12221217
12231218                    end do 
1224- 
12251219                else 
12261220
1227-                     ! Initial estimate of optimal processor topology 
1228-                     num_procs_x = 1 
1229-                     num_procs_y = 1 
1230-                     num_procs_z = num_procs 
1231-                     ierr = -1 
1221+                     if (cyl_coord .and. p > 0) then 
1222+                         ! Implement pencil processor blocking if using cylindrical coordinates so 
1223+                         ! that all cells in azimuthal direction are stored on a single processor. 
1224+                         ! This is necessary for efficient application of Fourier filter near axis. 
12321225
1233-                     ! Benchmarking the quality of this initial guess 
1234-                     tmp_num_procs_x = num_procs_x 
1235-                     tmp_num_procs_y = num_procs_y 
1236-                     tmp_num_procs_z = num_procs_z 
1237-                     fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & 
1238-                                          - (n + 1)/tmp_num_procs_y) & 
1239-                               + 10._wp*abs((n + 1)/tmp_num_procs_y & 
1240-                                            - (p + 1)/tmp_num_procs_z) 
1226+                         ! Initial values of the processor factorization optimization 
1227+                         num_procs_x = 1 
1228+                         num_procs_y = num_procs 
1229+                         num_procs_z = 1 
1230+                         ierr = -1 
12411231
1242-                     ! Optimization of the initial processor topology 
1243-                     do i = 1, num_procs 
1232+                         ! Computing minimization variable for these initial values 
1233+                         tmp_num_procs_x = num_procs_x 
1234+                         tmp_num_procs_y = num_procs_y 
1235+                         tmp_num_procs_z = num_procs_z 
1236+                         fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & 
1237+                                              - (n + 1)/tmp_num_procs_y) 
12441238
1245-                         if (mod(num_procs, i) == 0 & 
1246-                             .and. & 
1247-                             (m + 1)/i >= num_stcls_min*recon_order) then 
1239+                         ! Searching for optimal computational domain distribution 
1240+                         do i = 1, num_procs 
1241+ 
1242+                             if (mod(num_procs, i) == 0 & 
1243+                                 .and. & 
1244+                                 (m + 1)/i >= num_stcls_min*recon_order) then 
12481245
1249-                             do j = 1, num_procs/i 
1246+                                 tmp_num_procs_x = i 
1247+                                 tmp_num_procs_y = num_procs/i 
12501248
1251-                                 if (mod(num_procs/i, j) == 0 & 
1249+                                 if (fct_min >= abs((m + 1)/tmp_num_procs_x & 
1250+                                                    - (n + 1)/tmp_num_procs_y) & 
12521251                                    .and. & 
1253-                                     (n + 1)/j >= num_stcls_min*recon_order) then 
1252+                                     (n + 1)/tmp_num_procs_y & 
1253+                                     >= & 
1254+                                     num_stcls_min*recon_order) then 
12541255
1255-                                     tmp_num_procs_x = i 
1256-                                     tmp_num_procs_y = j 
1257-                                     tmp_num_procs_z = num_procs/(i*j) 
1256+                                     num_procs_x = i 
1257+                                     num_procs_y = num_procs/i 
1258+                                     fct_min = abs((m + 1)/tmp_num_procs_x & 
1259+                                                   - (n + 1)/tmp_num_procs_y) 
1260+                                     ierr = 0 
12581261
1259-                                     if (fct_min >= abs((m + 1)/tmp_num_procs_x & 
1260-                                                        - (n + 1)/tmp_num_procs_y) & 
1261-                                         + abs((n + 1)/tmp_num_procs_y & 
1262-                                               - (p + 1)/tmp_num_procs_z) & 
1262+                                 end if 
1263+ 
1264+                             end if 
1265+ 
1266+                         end do 
1267+ 
1268+                     else 
1269+ 
1270+                         ! Initial estimate of optimal processor topology 
1271+                         num_procs_x = 1 
1272+                         num_procs_y = 1 
1273+                         num_procs_z = num_procs 
1274+                         ierr = -1 
1275+ 
1276+                         ! Benchmarking the quality of this initial guess 
1277+                         tmp_num_procs_x = num_procs_x 
1278+                         tmp_num_procs_y = num_procs_y 
1279+                         tmp_num_procs_z = num_procs_z 
1280+                         fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & 
1281+                                              - (n + 1)/tmp_num_procs_y) & 
1282+                                   + 10._wp*abs((n + 1)/tmp_num_procs_y & 
1283+                                                - (p + 1)/tmp_num_procs_z) 
1284+ 
1285+                         ! Optimization of the initial processor topology 
1286+                         do i = 1, num_procs 
1287+ 
1288+                             if (mod(num_procs, i) == 0 & 
1289+                                 .and. & 
1290+                                 (m + 1)/i >= num_stcls_min*recon_order) then 
1291+ 
1292+                                 do j = 1, num_procs/i 
1293+ 
1294+                                     if (mod(num_procs/i, j) == 0 & 
12631295                                        .and. & 
1264-                                         (p + 1)/tmp_num_procs_z & 
1265-                                         >= & 
1266-                                         num_stcls_min*recon_order) & 
1267-                                         then 
1268- 
1269-                                         num_procs_x = i 
1270-                                         num_procs_y = j 
1271-                                         num_procs_z = num_procs/(i*j) 
1272-                                         fct_min = abs((m + 1)/tmp_num_procs_x & 
1273-                                                       - (n + 1)/tmp_num_procs_y) & 
1274-                                                   + abs((n + 1)/tmp_num_procs_y & 
1275-                                                         - (p + 1)/tmp_num_procs_z) 
1276-                                         ierr = 0 
1296+                                         (n + 1)/j >= num_stcls_min*recon_order) then 
1297+ 
1298+                                         tmp_num_procs_x = i 
1299+                                         tmp_num_procs_y = j 
1300+                                         tmp_num_procs_z = num_procs/(i*j) 
1301+ 
1302+                                         if (fct_min >= abs((m + 1)/tmp_num_procs_x & 
1303+                                                            - (n + 1)/tmp_num_procs_y) & 
1304+                                             + abs((n + 1)/tmp_num_procs_y & 
1305+                                                   - (p + 1)/tmp_num_procs_z) & 
1306+                                             .and. & 
1307+                                             (p + 1)/tmp_num_procs_z & 
1308+                                             >= & 
1309+                                             num_stcls_min*recon_order) & 
1310+                                             then 
1311+ 
1312+                                             num_procs_x = i 
1313+                                             num_procs_y = j 
1314+                                             num_procs_z = num_procs/(i*j) 
1315+                                             fct_min = abs((m + 1)/tmp_num_procs_x & 
1316+                                                           - (n + 1)/tmp_num_procs_y) & 
1317+                                                       + abs((n + 1)/tmp_num_procs_y & 
1318+                                                             - (p + 1)/tmp_num_procs_z) 
1319+                                             ierr = 0 
1320+ 
1321+                                         end if 
12771322
12781323                                    end if 
12791324
1280-                                 end if 
1281- 
1282-                             end do 
1325+                                 end do 
12831326
1284-                         end if 
1327+                              end if 
12851328
1286-                     end do 
1329+                          end do 
12871330
1331+                     end if 
12881332                end if 
12891333
12901334                ! Verifying that a valid decomposition of the computational 
0 commit comments