Skip to content

Commit 7051855

Browse files
author
Daniel Vickers
committed
This solves the issue with some slow atomic updates. I am going to speed this up now with reductions
1 parent 2eedfe6 commit 7051855

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

src/simulation/m_ibm.fpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,16 +1001,16 @@ contains
10011001
10021002
real(wp), dimension(0:m, 0:n, 0:p), intent(in) :: pressure
10031003
1004-
integer :: i, j, k, ib_idx
1004+
integer :: i, j, k, l, ib_idx
10051005
real(wp), dimension(num_ibs, 3) :: forces, torques
1006-
real(wp), dimension(1:3) :: pressure_divergence, radial_vector
1006+
real(wp), dimension(1:3) :: pressure_divergence, radial_vector, temp_torque_vector
10071007
real(wp) :: cell_volume, dx, dy, dz
10081008
10091009
forces = 0._wp
10101010
torques = 0._wp
10111011
10121012
! TODO :: This is currently only valid inviscid, and needs to be extended to add viscocity
1013-
$:GPU_PARALLEL_LOOP(private='[ib_idx,radial_vector,pressure_divergence,cell_volume, dx, dy, dz]', copy='[forces,torques]', copyin='[ib_markers]', collapse=3)
1013+
$:GPU_PARALLEL_LOOP(private='[ib_idx,radial_vector,pressure_divergence,cell_volume,temp_torque_vector, dx, dy, dz]', copy='[forces,torques]', copyin='[ib_markers]', collapse=3)
10141014
do i = 0, m
10151015
do j = 0, n
10161016
do k = 0, p
@@ -1041,10 +1041,13 @@ contains
10411041
end if
10421042
10431043
! Update the force values atomically to prevent race conditions
1044-
$:GPU_ATOMIC(atomic='update')
1045-
forces(ib_idx, :) = forces(ib_idx, :) - (pressure_divergence*cell_volume)
1046-
$:GPU_ATOMIC(atomic='update')
1047-
torques(ib_idx, :) = torques(ib_idx, :) - (cross_product(radial_vector, pressure_divergence)*cell_volume)
1044+
temp_torque_vector = cross_product(radial_vector, pressure_divergence)*cell_volume ! separate out to make atomics safe
1045+
do l = 1, 3
1046+
$:GPU_ATOMIC(atomic='update')
1047+
forces(ib_idx, l) = forces(ib_idx, l) - (pressure_divergence(l)*cell_volume)
1048+
$:GPU_ATOMIC(atomic='update')
1049+
torques(ib_idx, l) = torques(ib_idx, l) - temp_torque_vector(l)
1050+
end do
10481051
end if
10491052
end if
10501053
end do

0 commit comments

Comments
 (0)