@@ -2,14 +2,18 @@ group = addgroup!(SUITE, "array")
2
2
3
3
const m = 512
4
4
const n = 1000
5
+ const m_long = 3
6
+ const n_long = 1_000_000
5
7
6
8
# generate some arrays
7
9
cpu_mat = rand (rng, Float32, m, n)
8
- gpu_mat = CuArray {Float32} (undef, size (cpu_mat))
10
+ gpu_mat = CuArray {Float32} (cpu_mat)
11
+ gpu_mat_long = CuArray {Float32} (rand (rng, Float32, m_long, n_long))
9
12
gpu_vec = reshape (gpu_mat, length (gpu_mat))
10
13
gpu_arr_3d = reshape (gpu_mat, (m, 40 , 25 ))
11
14
gpu_arr_4d = reshape (gpu_mat, (m, 10 , 10 , 10 ))
12
- gpu_mat_ints = CuArray (rand (rng, Int, m, n))
15
+ gpu_mat_ints = CuArray (rand (rng, - 10 : 10 , m, n))
16
+ gpu_mat_long_ints = CuArray (rand (rng, - 10 : 10 , m_long, n_long))
13
17
gpu_vec_ints = reshape (gpu_mat_ints, length (gpu_mat_ints))
14
18
gpu_mat_bools = CuArray (rand (rng, Bool, m, n))
15
19
gpu_vec_bools = reshape (gpu_mat_bools, length (gpu_mat_bools))
@@ -57,19 +61,57 @@ group["broadcast"] = @async_benchmarkable $gpu_mat .= 0f0
57
61
58
62
# no need to test inplace version, which performs the same operation (but with an alloc)
59
63
let group = addgroup! (group, " accumulate" )
60
- group[" 1d" ] = @async_benchmarkable accumulate (+ , $ gpu_vec)
61
- group[" 2d" ] = @async_benchmarkable accumulate (+ , $ gpu_mat; dims= 1 )
64
+ let group = addgroup! (group, " Float32" )
65
+ group[" 1d" ] = @async_benchmarkable accumulate (+ , $ gpu_vec)
66
+ group[" dims=1" ] = @async_benchmarkable accumulate (+ , $ gpu_mat; dims= 1 )
67
+ group[" dims=2" ] = @async_benchmarkable accumulate (+ , $ gpu_mat; dims= 2 )
68
+
69
+ group[" dims=1L" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_long; dims= 1 )
70
+ group[" dims=2L" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_long; dims= 2 )
71
+ end
72
+ let group = addgroup! (group, " Int64" )
73
+ group[" 1d" ] = @async_benchmarkable accumulate (+ , $ gpu_vec_ints)
74
+ group[" dims=1" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_ints; dims= 1 )
75
+ group[" dims=2" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_ints; dims= 2 )
76
+
77
+ group[" dims=1L" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_long_ints; dims= 1 )
78
+ group[" dims=2L" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_long_ints; dims= 2 )
79
+ end
62
80
end
63
81
64
82
let group = addgroup! (group, " reductions" )
65
83
let group = addgroup! (group, " reduce" )
66
- group[" 1d" ] = @async_benchmarkable reduce (+ , $ gpu_vec)
67
- group[" 2d" ] = @async_benchmarkable reduce (+ , $ gpu_mat; dims= 1 )
84
+ let group = addgroup! (group, " Float32" )
85
+ group[" 1d" ] = @async_benchmarkable reduce (+ , $ gpu_vec)
86
+ group[" dims=1" ] = @async_benchmarkable reduce (+ , $ gpu_mat; dims= 1 )
87
+ group[" dims=2" ] = @async_benchmarkable reduce (+ , $ gpu_mat; dims= 2 )
88
+ group[" dims=1L" ] = @async_benchmarkable reduce (+ , $ gpu_mat_long; dims= 1 )
89
+ group[" dims=2L" ] = @async_benchmarkable reduce (+ , $ gpu_mat_long; dims= 2 )
90
+ end
91
+ let group = addgroup! (group, " Int64" )
92
+ group[" 1d" ] = @async_benchmarkable reduce (+ , $ gpu_vec_ints)
93
+ group[" dims=1" ] = @async_benchmarkable reduce (+ , $ gpu_mat_ints; dims= 1 )
94
+ group[" dims=2" ] = @async_benchmarkable reduce (+ , $ gpu_mat_ints; dims= 2 )
95
+ group[" dims=1L" ] = @async_benchmarkable reduce (+ , $ gpu_mat_long_ints; dims= 1 )
96
+ group[" dims=2L" ] = @async_benchmarkable reduce (+ , $ gpu_mat_long_ints; dims= 2 )
97
+ end
68
98
end
69
99
70
100
let group = addgroup! (group, " mapreduce" )
71
- group[" 1d" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_vec)
72
- group[" 2d" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat; dims= 1 )
101
+ let group = addgroup! (group, " Float32" )
102
+ group[" 1d" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_vec)
103
+ group[" dims=1" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat; dims= 1 )
104
+ group[" dims=2" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat; dims= 2 )
105
+ group[" dims=1L" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_long; dims= 1 )
106
+ group[" dims=2L" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_long; dims= 2 )
107
+ end
108
+ let group = addgroup! (group, " Int64" )
109
+ group[" 1d" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_vec_ints)
110
+ group[" dims=1" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_ints; dims= 1 )
111
+ group[" dims=2" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_ints; dims= 2 )
112
+ group[" dims=1L" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_long_ints; dims= 1 )
113
+ group[" dims=2L" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_long_ints; dims= 2 )
114
+ end
73
115
end
74
116
75
117
# used by sum, prod, minimum, maximum, all, any, count
@@ -88,12 +130,10 @@ let group = addgroup!(group, "random")
88
130
89
131
let group = addgroup! (group, " randn" )
90
132
group[" Float32" ] = @async_benchmarkable CUDA. randn (Float32, m* n)
91
- # group["Int64"] = @async_benchmarkable CUDA.randn(Int64, m*n)
92
133
end
93
134
94
135
let group = addgroup! (group, " randn!" )
95
136
group[" Float32" ] = @async_benchmarkable CUDA. randn! ($ gpu_vec)
96
- # group["Int64"] = @async_benchmarkable CUDA.randn!($gpu_vec_ints)
97
137
end
98
138
end
99
139
0 commit comments