Skip to content

Commit 37151dc

Browse files
committed
use new LocalMemory
1 parent 477172a commit 37151dc

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

src/linalg.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
function transpose_kernel!(
2-
state, At, A, width, height, A_local, ::Val{BLOCK}
3-
) where BLOCK
2+
state, At, A::AbstractArray{T}, width, height, ::Val{BLOCK}, ::Val{LMem}
3+
) where {BLOCK, LMem, T}
44

55
ui1 = UInt32(1)
66
bidx_x = blockidx_x(state) - ui1
77
bidx_y = blockidx_y(state) - ui1
88
tidx_x = threadidx_x(state) - ui1
99
tidx_y = threadidx_y(state) - ui1
1010

11+
A_local = LocalMemory(state, T, LMem)
12+
1113
base_idx_a = bidx_x * BLOCK + bidx_y * (BLOCK * width)
1214
base_idx_a_t = bidx_y * BLOCK + bidx_x * (BLOCK * height)
1315

@@ -31,8 +33,8 @@ function Base.transpose!{T}(At::GPUArray{T, 2}, A::GPUArray{T, 2})
3133
dev = GPUArrays.device(A)
3234
block_size = max_block_size(dev, size(A)...)
3335
outsize = UInt32.(size(At))
34-
lmem = GPUArrays.LocalMemory{T}(block_size * (block_size + 1))
35-
args = (At, A, outsize..., lmem, Val{block_size}())
36+
lmem = block_size * (block_size + 1)
37+
args = (At, A, outsize..., Val{block_size}(), Val{lmem}())
3638
gpu_call(transpose_kernel!, At, args, (block_size, block_size))
3739
At
3840
end

0 commit comments

Comments
 (0)