Skip to content

Commit 932df13

Browse files
committed
fix: Minor changes.
1 parent 33ef903 commit 932df13

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

src/cuda/backend.f90

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
module m_cuda_backend
2+
use iso_fortran_env, only: stderr => error_unit
23
use cudafor
34

45
use m_allocator, only: allocator_t, field_t
@@ -452,12 +453,10 @@ subroutine reorder_cuda(self, u_o, u_i, direction)
452453
case (RDR_Z2Y) ! z2y
453454
blocks = dim3(self%nx_loc/SZ, self%ny_loc/SZ, self%nz_loc)
454455
threads = dim3(SZ, SZ, 1)
455-
456456
call reorder_z2y<<<blocks, threads>>>(u_o_d, u_i_d, &
457457
self%nx_loc, self%nz_loc)
458458
case default
459-
print *, 'Transpose direction is undefined.'
460-
stop
459+
error stop 'Reorder direction is undefined.'
461460
end select
462461

463462
end subroutine reorder_cuda

src/cuda/kernels_reorder.f90

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,13 @@ attributes(global) subroutine reorder_x2z(u_z, u_x, nz)
3636
real(dp), device, intent(in), dimension(:, :, :) :: u_x
3737
integer, value, intent(in) :: nz
3838

39-
integer :: i, j, b_i, b_j, nx!, nz
39+
integer :: i, j, b_i, b_j, nx
4040

4141
i = threadIdx%x; b_i = blockIdx%x; b_j = blockIdx%y
4242
nx = gridDim%x
4343

44+
! Data access pattern for reordering between x and z is quite nice
45+
! thus we don't need to use shared memory for this operation.
4446
do j = 1, nz
4547
u_z(i, j, b_i + (b_j - 1)*nx) = u_x(i, b_i, j + (b_j - 1)*nz)
4648
end do

0 commit comments

Comments
 (0)