Skip to content

Commit a97f7c4

Browse files
authored
Merge pull request xcompact3d#19 from Nanoseb/omp-tridiag-kernel
Add tridiagonal solver to omp backend
2 parents b464e88 + ac26638 commit a97f7c4

File tree

5 files changed

+152
-106
lines changed

5 files changed

+152
-106
lines changed

src/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ set(SRC
88
omp/backend.f90
99
omp/common.f90
1010
omp/kernels_dist.f90
11+
omp/sendrecv.f90
12+
omp/exec_dist.f90
1113
)
1214
set(CUDASRC
1315
cuda/backend.f90

src/omp/exec_dist.f90

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
module m_omp_exec_dist
2+
use mpi
3+
4+
use m_common, only: dp
5+
use m_omp_common, only: SZ
6+
use m_omp_kernels_dist, only: der_univ_dist, der_univ_subs
7+
use m_tdsops, only: tdsops_t
8+
use m_omp_sendrecv, only: sendrecv_fields
9+
10+
implicit none
11+
12+
contains
13+
14+
subroutine exec_dist_tds_compact( &
15+
du, u, u_recv_s, u_recv_e, du_send_s, du_send_e, du_recv_s, du_recv_e, &
16+
tdsops, nproc, pprev, pnext, n_block &
17+
)
18+
implicit none
19+
20+
! du = d(u)
21+
real(dp), dimension(:, :, :), intent(out) :: du
22+
real(dp), dimension(:, :, :), intent(in) :: u, u_recv_s, u_recv_e
23+
24+
! The ones below are intent(out) just so that we can write data in them,
25+
! not because we actually need the data they store later where this
26+
! subroutine is called. We absolutely don't care about the data they pass back
27+
real(dp), dimension(:, :, :), intent(out) :: &
28+
du_send_s, du_send_e, du_recv_s, du_recv_e
29+
30+
type(tdsops_t), intent(in) :: tdsops
31+
integer, intent(in) :: nproc, pprev, pnext
32+
integer, intent(in) :: n_block
33+
34+
integer :: n_data
35+
integer :: k
36+
37+
n_data = SZ*n_block
38+
39+
!$omp parallel do
40+
do k = 1, n_block
41+
call der_univ_dist( &
42+
du(:, :, k), du_send_s(:, :, k), du_send_e(:, :, k), u(:, :, k), &
43+
u_recv_s(:, :, k), u_recv_e(:, :, k), &
44+
tdsops%coeffs_s, tdsops%coeffs_e, tdsops%coeffs, tdsops%n, &
45+
tdsops%dist_fw, tdsops%dist_bw, tdsops%dist_af &
46+
)
47+
end do
48+
!$omp end parallel do
49+
50+
! halo exchange for 2x2 systems
51+
call sendrecv_fields(du_recv_s, du_recv_e, du_send_s, du_send_e, &
52+
n_data, nproc, pprev, pnext)
53+
54+
!$omp parallel do
55+
do k = 1, n_block
56+
call der_univ_subs(du(:, :, k), &
57+
du_recv_s(:, :, k), du_recv_e(:, :, k), &
58+
tdsops%n, tdsops%dist_sa, tdsops%dist_sc)
59+
end do
60+
!$omp end parallel do
61+
62+
end subroutine exec_dist_tds_compact
63+
64+
end module m_omp_exec_dist
65+

src/omp/kernels_dist.f90

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ module m_omp_kernels_dist
88

99
contains
1010

11-
subroutine der_univ_dist_omp( &
11+
subroutine der_univ_dist( &
1212
du, send_u_s, send_u_e, u, u_s, u_e, coeffs_s, coeffs_e, coeffs, n, &
1313
ffr, fbc, faf &
1414
)
@@ -134,9 +134,9 @@ subroutine der_univ_dist_omp( &
134134
end do
135135
!$omp end simd
136136

137-
end subroutine der_univ_dist_omp
137+
end subroutine der_univ_dist
138138

139-
subroutine der_univ_subs_omp(du, recv_u_s, recv_u_e, n, dist_sa, dist_sc)
139+
subroutine der_univ_subs(du, recv_u_s, recv_u_e, n, dist_sa, dist_sc)
140140
implicit none
141141

142142
! Arguments
@@ -193,6 +193,6 @@ subroutine der_univ_subs_omp(du, recv_u_s, recv_u_e, n, dist_sa, dist_sc)
193193
end do
194194
!$omp end simd
195195

196-
end subroutine der_univ_subs_omp
196+
end subroutine der_univ_subs
197197

198198
end module m_omp_kernels_dist

src/omp/sendrecv.f90

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
module m_omp_sendrecv
2+
use mpi
3+
4+
use m_common, only: dp
5+
6+
implicit none
7+
8+
contains
9+
10+
subroutine sendrecv_fields(f_recv_s, f_recv_e, f_send_s, f_send_e, &
11+
n_data, nproc, prev, next)
12+
implicit none
13+
14+
real(dp), dimension(:, :, :), intent(out) :: f_recv_s, f_recv_e
15+
real(dp), dimension(:, :, :), intent(in) :: f_send_s, f_send_e
16+
integer, intent(in) :: n_data, nproc, prev, next
17+
18+
integer :: req(4), err(4), ierr, tag = 1234
19+
20+
if (nproc == 1) then
21+
f_recv_s = f_send_e
22+
f_recv_e = f_send_s
23+
else
24+
call MPI_Isend(f_send_s, n_data, MPI_DOUBLE_PRECISION, &
25+
prev, tag, MPI_COMM_WORLD, req(1), err(1))
26+
call MPI_Irecv(f_recv_e, n_data, MPI_DOUBLE_PRECISION, &
27+
next, tag, MPI_COMM_WORLD, req(2), err(2))
28+
call MPI_Isend(f_send_e, n_data, MPI_DOUBLE_PRECISION, &
29+
next, tag, MPI_COMM_WORLD, req(3), err(3))
30+
call MPI_Irecv(f_recv_s, n_data, MPI_DOUBLE_PRECISION, &
31+
prev, tag, MPI_COMM_WORLD, req(4), err(4))
32+
33+
call MPI_Waitall(4, req, MPI_STATUSES_IGNORE, ierr)
34+
end if
35+
36+
end subroutine sendrecv_fields
37+
38+
end module m_omp_sendrecv

0 commit comments

Comments
 (0)