Skip to content

Commit f0b09ad

Browse files
authored
Fixing transpose function, a missing "end" and some argument orders. (#487)
1 parent 1ff9a26 commit f0b09ad

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

examples/performance.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ end
2121

2222
@kernel function simple_transpose_kernel!(output, @Const(input))
2323
I, J = @index(Global, NTuple)
24-
@inbounds output[I, J] = input[I, J]
24+
@inbounds output[J, I] = input[I, J]
2525
end
2626

2727
# Local memory variants
@@ -141,8 +141,10 @@ for block_dims in ((TILE_DIM, TILE_DIM), (TILE_DIM*TILE_DIM, 1), (1, TILE_DIM*TI
141141
output = similar(input)
142142

143143
# compile kernel
144-
kernel(input, output, ndrange=size(output))
144+
kernel(output, input, ndrange=size(output))
145145
for rep in 1:nreps
146+
kernel(output, input, ndrange=size(output))
147+
end
146148
KernelAbstractions.synchronize(backend)
147149
end
148150
end
@@ -159,9 +161,9 @@ for (name, kernel) in (
159161
output = similar(input)
160162

161163
# compile kernel
162-
kernel(input, output, Val(Int(bank)), ndrange=size(output))
164+
kernel(output, input, Val(Int(bank)), ndrange=size(output))
163165
for rep in 1:nreps
164-
kernel(input, output, Val(Int(bank)), ndrange=size(output))
166+
kernel(output, input, Val(Int(bank)), ndrange=size(output))
165167
end
166168
KernelAbstractions.synchronize(backend)
167169
end
@@ -185,9 +187,9 @@ for (name, kernel) in (
185187
ndrange = (N, div(N, block_factor))
186188

187189
# compile kernel
188-
kernel(input, output, Val(Int(bank)), ndrange=ndrange)
190+
kernel(output, input, Val(Int(bank)), ndrange=ndrange)
189191
for rep in 1:nreps
190-
kernel(input, output, Val(Int(bank)), ndrange=ndrange)
192+
kernel(output, input, Val(Int(bank)), ndrange=ndrange)
191193
end
192194
KernelAbstractions.synchronize(backend)
193195
end

0 commit comments

Comments
 (0)