Skip to content

Commit b6d94be

Browse files
committed
fix #77
1 parent 149e29e commit b6d94be

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

src/linalg.jl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,15 @@ function transpose_blocks!(
4242

4343
for j = UInt32(0):UInt32(3)
4444
j0 = j * BLOCK_ROWS
45-
tile[tidx_x + ui1, tidx_y + j0 + ui1] = idata[y + j0, x]
45+
@inbounds tile[tidx_x + ui1, tidx_y + j0 + ui1] = idata[y + j0, x]
4646
end
4747

4848
synchronize_threads(state)
4949
for j = UInt32(0):UInt32(3)
5050
j0 = j * BLOCK_ROWS
51-
odata[x, y + j0] = tile[tidx_x + ui1, tidx_y + j0 + ui1]
51+
@inbounds odata[x, y + j0] = tile[tidx_x + ui1, tidx_y + j0 + ui1]
5252
end
53+
5354
return
5455
end
5556

@@ -84,8 +85,8 @@ end
8485
function permutedims!(dest::GPUArray, src::GPUArray, perm)
8586
perm = UInt32.((perm...,))
8687
gpu_call(dest, (dest, src, perm)) do state, dest, src, perm
87-
I = @cartesianidx dest state
88-
@inbounds dest[I...] = src[genperm(I, perm)...]
88+
I = @cartesianidx src state
89+
@inbounds dest[genperm(I, perm)...] = src[I...]
8990
return
9091
end
9192
return dest

src/testsuite/linalg.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ function run_linalg(Typ)
1010
@testset "PermuteDims" begin
1111
against_base(x -> permutedims(x, (2, 1)), T, (2, 3))
1212
against_base(x -> permutedims(x, (2, 1, 3)), T, (4, 5, 6))
13+
against_base(x -> permutedims(x, (3, 1, 2)), T, (4, 5, 6))
1314
end
1415
end
1516
end

0 commit comments

Comments
 (0)