@@ -25,45 +25,60 @@ linear index in a GPU kernel (equal to OpenCL.get_global_id)
25
25
@inline function linear_index (state)
26
26
UInt32 ((blockidx_x (state) - UInt32 (1 )) * blockdim_x (state) + threadidx_x (state))
27
27
end
28
+
29
+ """
30
+ Macro form of `linear_index`, which returns when out of bounds
31
+ """
32
+ macro linearidx (A, statesym = :state )
33
+ quote
34
+ x1 = $ (esc (A))
35
+ i1 = linear_index ($ (esc (statesym)))
36
+ i1 > length (x1) && return
37
+ i1
38
+ end
39
+ end
40
+
41
+
42
+ """
43
+ Like `@linearidx`, but returns an N-dimensional `NTuple{ndim(A), Cuint}` as index
44
+ """
45
+ macro cartesianidx (A, statesym = :state )
46
+ quote
47
+ x = $ (esc (A))
48
+ i2 = @linearidx (x, $ (esc (statesym)))
49
+ gpu_ind2sub (x, i2)
50
+ end
51
+ end
52
+
53
+ """
54
+ Global size == blockdim * griddim == total number of kernel execution
55
+ """
28
56
@inline function global_size (state)
57
+ # TODO nd version
29
58
griddim_x (state) * blockdim_x (state)
30
59
end
31
60
61
+
32
62
"""
33
- Blocks until all operations are finished on `A`
63
+ Gets the device associated to the Array `A`
34
64
"""
35
- function synchronize (A:: AbstractArray )
65
+ function device (A:: AbstractArray )
36
66
# fallback is a noop, for backends not needing synchronization. This
37
67
# makes it easier to write generic code that also works for AbstractArrays
38
68
end
39
69
"""
40
- Gets the device associated to the Array `A`
70
+ Blocks until all operations are finished on `A`
41
71
"""
42
- function device (A:: AbstractArray )
72
+ function synchronize (A:: AbstractArray )
43
73
# fallback is a noop, for backends not needing synchronization. This
44
74
# makes it easier to write generic code that also works for AbstractArrays
45
75
end
46
-
47
76
#
48
77
# @inline function synchronize_threads(state)
49
78
# CUDAnative.__syncthreads()
50
79
# end
51
80
52
- macro linearidx (A, statesym = :state )
53
- quote
54
- x1 = $ (esc (A))
55
- i1 = linear_index ($ (esc (statesym)))
56
- i1 > length (x1) && return
57
- i1
58
- end
59
- end
60
- macro cartesianidx (A, statesym = :state )
61
- quote
62
- x = $ (esc (A))
63
- i2 = @linearidx (x, $ (esc (statesym)))
64
- gpu_ind2sub (x, i2)
65
- end
66
- end
81
+
67
82
68
83
69
84
"""
0 commit comments