@@ -39,16 +39,19 @@ module coefs
3939 use dofmap, only : dofmap_t
4040 use space, only: space_t
4141 use math, only : rone, invcol1, addcol3, subcol3, copy, &
42- chsign, rzero, invers2, glsum, absval, NEKO_EPS
42+ chsign, rzero, invers2, glsum, NEKO_EPS
4343 use mesh, only : mesh_t
4444 use device_math, only : device_rone, device_invcol1, &
45- device_glsum, device_absval
45+ device_glsum
4646 use device_coef, only : device_coef_generate_geo, &
4747 device_coef_generate_dxydrst
4848 use mxm_wrapper, only : mxm
4949 use device
5050 use utils, only : index_is_on_facet, linear_index, &
5151 neko_error
52+ use comm, only : NEKO_COMM
53+ use neko_config, only : NEKO_BCKND_DEVICE
54+ use mpi_f08, only : MPI_Allreduce, MPI_INTEGER, MPI_SUM
5255 use , intrinsic :: iso_c_binding
5356 implicit none
5457 private
@@ -161,7 +164,7 @@ module coefs
161164 procedure , pass(this) :: free = > coef_free
162165 procedure , pass(this) :: get_normal = > coef_get_normal
163166 procedure , pass(this) :: get_area = > coef_get_area
164- procedure , pass(this) :: check_cyclic = > coef_check_cyclic
167+ procedure , pass(this) :: generate_cyclic_bc = > coef_generate_cyclic_bc
165168 generic :: init = > init_empty, init_all
166169 end type coef_t
167170
@@ -1220,34 +1223,46 @@ subroutine coef_generate_area_and_normal(coef)
12201223
12211224 end subroutine coef_generate_area_and_normal
12221225
1223- subroutine coef_generate_cyclic_bc (coef )
1224- type (coef_t), intent (inout ) :: coef
1226+ subroutine coef_generate_cyclic_bc (this )
1227+ class (coef_t), intent (inout ) :: this
12251228 real (kind= rp) :: un(3 ), len, d
1226- integer :: lx, ly, lz, np
1229+ integer :: lx, ly, lz, np, np_glb, ierr
12271230 integer :: i, j, k, pf, pe, n, nc, ncyc
12281231
1229- np = coef% msh% periodic% size
1230- lx = coef% Xh% lx
1231- ly = coef% Xh% ly
1232- lz = coef% Xh% lz
1233- ncyc = coef% cyc_msk(0 ) - 1
1232+ if (.not. this% cyclic) return
1233+
1234+ np = this% msh% periodic% size
1235+ call MPI_Allreduce(np, np_glb, 1 , &
1236+ MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr)
1237+
1238+ if (np_glb .eq. 0 ) then
1239+ call neko_error(" There are no periodic boundaries. " // &
1240+ " Switch cyclic off in the case file." )
1241+ end if
1242+
1243+ if (np .eq. 0 ) return
1244+
1245+ lx = this% Xh% lx
1246+ ly = this% Xh% ly
1247+ lz = this% Xh% lz
1248+ ncyc = this% cyc_msk(0 ) - 1
12341249 nc = 1
12351250 do n = 1 , np
1236- pf = coef % msh% periodic% facet_el(n)% x(1 )
1237- pe = coef % msh% periodic% facet_el(n)% x(2 )
1251+ pf = this % msh% periodic% facet_el(n)% x(1 )
1252+ pe = this % msh% periodic% facet_el(n)% x(2 )
12381253 do k = 1 , lz
12391254 do j = 1 , ly
12401255 do i = 1 , lx
12411256 if (index_is_on_facet(i, j, k, lx, ly, lz, pf)) then
1242- un = coef % get_normal(i, j, k, pe, pf)
1257+ un = this % get_normal(i, j, k, pe, pf)
12431258 len = sqrt (un(1 ) * un(1 ) + un(2 ) * un(2 ))
12441259 if (len .gt. NEKO_EPS) then
1245- d = coef % dof% y(i, j, k, pe) * un(1 ) &
1246- - coef % dof% x(i, j, k, pe) * un(2 )
1260+ d = this % dof% y(i, j, k, pe) * un(1 ) &
1261+ - this % dof% x(i, j, k, pe) * un(2 )
12471262
1248- coef % cyc_msk(nc) = linear_index(i, j, k, pe, lx, ly, lz)
1249- coef % R11(nc) = un(1 ) / len * sign (1.0_rp , d)
1250- coef % R12(nc) = un(2 ) / len * sign (1.0_rp , d)
1263+ this % cyc_msk(nc) = linear_index(i, j, k, pe, lx, ly, lz)
1264+ this % R11(nc) = un(1 ) / len * sign (1.0_rp , d)
1265+ this % R12(nc) = un(2 ) / len * sign (1.0_rp , d)
12511266 nc = nc + 1
12521267 else
12531268 call neko_error(" x and y components of surface " // &
@@ -1266,142 +1281,11 @@ subroutine coef_generate_cyclic_bc(coef)
12661281 end if
12671282
12681283 if (NEKO_BCKND_DEVICE .eq. 1 ) then
1269- call device_memcpy(coef % cyc_msk, coef % cyc_msk_d, ncyc+1 , HOST_TO_DEVICE, sync= .false. )
1270- call device_memcpy(coef % R11, coef % R11_d, ncyc, HOST_TO_DEVICE, sync= .false. )
1271- call device_memcpy(coef % R12, coef % R12_d, ncyc, HOST_TO_DEVICE, sync= .false. )
1284+ call device_memcpy(this % cyc_msk, this % cyc_msk_d, ncyc+1 , HOST_TO_DEVICE, sync= .false. )
1285+ call device_memcpy(this % R11, this % R11_d, ncyc, HOST_TO_DEVICE, sync= .false. )
1286+ call device_memcpy(this % R12, this % R12_d, ncyc, HOST_TO_DEVICE, sync= .false. )
12721287 end if
12731288
12741289 end subroutine coef_generate_cyclic_bc
12751290
1276-
1277- subroutine coef_check_cyclic (this )
1278- class(coef_t), intent (inout ) :: this
1279- ! DSS is applied on vector field "norm" with all zeros
1280- ! except values equal to surface normals at periodic
1281- ! faces. The results must sum to zero
1282- ! Check happens in two passes -
1283- ! >ipass = 1 : DSS in Cartesian coordinates. If it yields 0,
1284- ! no cyclic rotation is required and must be switched off.
1285- ! >ipass = 2 : DSS in TNB basis (rotation around z-axis)
1286- ! If yields 0, cyclic rotation is required and must be
1287- ! switched on. If not, then the current rotation logic
1288- ! is not sufficient and must be modified.
1289- ! "cyclic": true in case file invokes these checks.
1290- integer :: np, n, lx, pf, pe, i, j, k, nc, ipass, ntot, ncyc
1291- real (kind= rp) :: un(3 )
1292- real (kind= rp), allocatable :: normx(:,:,:,:)
1293- real (kind= rp), allocatable :: normy(:,:,:,:)
1294- real (kind= rp), allocatable :: normz(:,:,:,:)
1295- type (c_ptr) :: normx_d = C_NULL_PTR
1296- type (c_ptr) :: normy_d = C_NULL_PTR
1297- type (c_ptr) :: normz_d = C_NULL_PTR
1298-
1299- logical :: norm_dss = .false.
1300-
1301- np = this% msh% periodic% size
1302- lx = this% Xh% lx
1303- ntot = this% dof% size ()
1304- ncyc = np* lx* lx
1305-
1306- if (.not. this% cyclic) return
1307-
1308- if (np .eq. 0 ) then
1309- call neko_error(" There are no periodic boundaries. " // &
1310- " Switch cyclic off in the case file." )
1311- end if
1312-
1313- allocate (normx(lx, lx, lx, this% msh% nelv))
1314- allocate (normy(lx, lx, lx, this% msh% nelv))
1315- allocate (normz(lx, lx, lx, this% msh% nelv))
1316-
1317- if (NEKO_BCKND_DEVICE .eq. 1 ) then
1318- call device_map(normx, normx_d, ntot)
1319- call device_map(normy, normy_d, ntot)
1320- call device_map(normz, normz_d, ntot)
1321- end if
1322-
1323- do ipass = 1 , 2
1324- norm_dss = .false.
1325- call rzero(normx, ntot)
1326- call rzero(normy, ntot)
1327- call rzero(normz, ntot)
1328-
1329- if (ipass .eq. 2 ) then
1330- call coef_generate_cyclic_bc(this)
1331- end if
1332-
1333- nc = 1
1334- do n = 1 , np
1335- pf = this% msh% periodic% facet_el(n)% x(1 )
1336- pe = this% msh% periodic% facet_el(n)% x(2 )
1337- do k = 1 , lx
1338- do j = 1 , lx
1339- do i = 1 , lx
1340- if (index_is_on_facet(i, j, k, lx, lx, lx, pf)) then
1341- un = this% get_normal(i, j, k, pe, pf)
1342- normx(i,j,k,pe) = un(1 ) * this% R11(nc) &
1343- + un(2 ) * this% R12(nc)
1344-
1345- normy(i,j,k,pe) =- un(1 ) * this% R12(nc) &
1346- + un(2 ) * this% R11(nc)
1347-
1348- normz(i,j,k,pe) = un(3 )
1349-
1350- nc = nc + 1
1351- end if
1352- end do
1353- end do
1354- end do
1355- end do
1356-
1357- if (NEKO_BCKND_DEVICE .eq. 1 ) then
1358- call device_memcpy(normx, normx_d, ntot, HOST_TO_DEVICE, sync= .false. )
1359- call device_memcpy(normy, normy_d, ntot, HOST_TO_DEVICE, sync= .false. )
1360- call device_memcpy(normz, normz_d, ntot, HOST_TO_DEVICE, sync= .false. )
1361- end if
1362-
1363- call this% gs_h% op(normx, ntot, GS_OP_ADD)
1364- call this% gs_h% op(normy, ntot, GS_OP_ADD)
1365- call this% gs_h% op(normz, ntot, GS_OP_ADD)
1366-
1367- if (NEKO_BCKND_DEVICE .eq. 1 ) then
1368- call device_absval(normx_d, ntot)
1369- call device_absval(normy_d, ntot)
1370- call device_absval(normz_d, ntot)
1371- norm_dss = device_glsum(normx_d, ntot)/ ntot .lt. 100.0 * NEKO_EPS .and. &
1372- device_glsum(normy_d, ntot)/ ntot .lt. 100.0 * NEKO_EPS .and. &
1373- device_glsum(normz_d, ntot)/ ntot .lt. 100.0 * NEKO_EPS
1374- else
1375- call absval(normx, ntot)
1376- call absval(normy, ntot)
1377- call absval(normz, ntot)
1378- norm_dss = glsum(normx, ntot)/ ntot .lt. 100 * NEKO_EPS .and. &
1379- glsum(normy, ntot)/ ntot .lt. 100 * NEKO_EPS .and. &
1380- glsum(normz, ntot)/ ntot .lt. 100 * NEKO_EPS
1381- end if
1382-
1383- if (ipass .eq. 1 .and. norm_dss) then
1384- call neko_error(" Cyclic rotation is not required. " // &
1385- " Switch it off in case file." )
1386- ! else if (ipass .eq. 1 .and. norm_dss .eqv. false)
1387- ! wait for ipass=2 to check if current rotation logic is sufficient
1388- else if (ipass .eq. 2 .and. .not. norm_dss) then
1389- call neko_error(" Cylic rotation is required, but " // &
1390- " rotation logic must be modified." )
1391- ! if (ipass .eq. 2 .and. norm_dss)
1392- ! current logic is sufficient, proceed.
1393- end if
1394- end do
1395- deallocate (normx)
1396- deallocate (normy)
1397- deallocate (normz)
1398-
1399- if (NEKO_BCKND_DEVICE .eq. 1 ) then
1400- call device_free(normx_d)
1401- call device_free(normy_d)
1402- call device_free(normz_d)
1403- end if
1404-
1405- end subroutine coef_check_cyclic
1406-
14071291end module coefs
0 commit comments