Skip to content

Commit b51001e

Browse files
authored
Merge pull request #203 from JuliaGPU/tb/avoid_string_interpolation
Hack around GPU incompatibility of Broadcast._bcs1.
2 parents b3eabf8 + 7ae5d08 commit b51001e

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

src/mapreduce.jl

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,14 +131,16 @@ for i = 0:10
131131
fargs = ntuple(x-> :(simple_broadcast_index($(args[x]), cartesian_global_index...)), i)
132132
@eval begin
133133
# http://developer.amd.com/resources/articles-whitepapers/opencl-optimization-case-study-simple-reductions/
134-
function reduce_kernel(state, f, op, v0::T, A, ::Val{LMEM}, result, $(args...)) where {T, LMEM}
134+
function reduce_kernel(state, f, op, v0::T, A, len, ax, ::Val{LMEM}, result, $(args...)) where {T, LMEM}
135135
tmp_local = @LocalMemory(state, T, LMEM)
136136
global_index = linear_index(state)
137137
acc = v0
138138
# # Loop sequentially over chunks of input vector
139-
while global_index <= length(A)
140-
cartesian_global_index = Tuple(CartesianIndices(axes(A))[global_index])
141-
element = f(A[cartesian_global_index...], $(fargs...))
139+
# HACK: length(A) and axes(A) aren't GPU compatible, so pass them instead
140+
# https://github.com/JuliaGPU/CUDAnative.jl/issues/367
141+
while global_index <= len
142+
cartesian_global_index = Tuple(CartesianIndices(ax)[global_index])
143+
@inbounds element = f(A[cartesian_global_index...], $(fargs...))
142144
acc = op(acc, element)
143145
global_index += global_size(state)
144146
end
@@ -182,7 +184,7 @@ function acc_mapreduce(f, op, v0::OT, A::GPUSrcArray, rest::Tuple) where {OT}
182184
end
183185
out = similar(A, OT, (blocksize,))
184186
fill!(out, v0)
185-
args = (f, op, v0, A, Val{threads}(), out, rest...)
187+
args = (f, op, v0, A, length(A), axes(A), Val{threads}(), out, rest...)
186188
gpu_call(reduce_kernel, out, args, ((blocksize,), (threads,)))
187189
reduce(op, Array(out))
188190
end

0 commit comments

Comments
 (0)