Skip to content

Commit b8fb812

Browse files
More accumulation and reduction benchmarks (#614)
* Test exapanded benchmarking * Increase benchmark timeout * Only run some benchmarks once
1 parent 7c2b820 commit b8fb812

File tree

2 files changed

+130
-84
lines changed

2 files changed

+130
-84
lines changed

.buildkite/pipeline.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,4 +151,4 @@ steps:
151151
build.message =~ /\[only benchmarks\]/ ||
152152
build.message !~ /\[only/ && !build.pull_request.draft &&
153153
build.message !~ /\[skip benchmarks\]/
154-
timeout_in_minutes: 30
154+
timeout_in_minutes: 45

perf/array.jl

Lines changed: 129 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,112 +1,158 @@
11
const m = 512
22
const n = 1000
3+
const m_long = 3
4+
const n_long = 1_000_000
5+
6+
group = addgroup!(SUITE, "array")
37

48
for (S, smname) in [(Metal.PrivateStorage,"private"), (Metal.SharedStorage,"shared")]
5-
local group = addgroup!(SUITE, "$smname array")
6-
7-
# generate some arrays
8-
cpu_mat = rand(rng, Float32, m, n)
9-
gpu_mat = MtlMatrix{Float32,S}(undef, size(cpu_mat))
10-
gpu_vec = reshape(gpu_mat, length(gpu_mat))
11-
gpu_arr_3d = reshape(gpu_mat, (m, 40, 25))
12-
gpu_arr_4d = reshape(gpu_mat, (m, 10, 10, 10))
13-
gpu_mat_ints = MtlMatrix{Int,S}(rand(rng, Int, m, n))
14-
gpu_vec_ints = reshape(gpu_mat_ints, length(gpu_mat_ints))
15-
gpu_mat_bools = MtlMatrix{Bool,S}(rand(rng, Bool, m, n))
16-
gpu_vec_bools = reshape(gpu_mat_bools, length(gpu_mat_bools))
17-
18-
group["construct"] = @benchmarkable MtlArray{Int,1,$S}(undef, 1)
19-
20-
group["copy"] = @benchmarkable Metal.@sync copy($gpu_mat)
21-
22-
gpu_mat2 = copy(gpu_mat)
23-
let group = addgroup!(group, "copyto!")
24-
group["cpu_to_gpu"] = @benchmarkable Metal.@sync copyto!($gpu_mat, $cpu_mat)
25-
group["gpu_to_cpu"] = @benchmarkable Metal.@sync copyto!($cpu_mat, $gpu_mat)
26-
group["gpu_to_gpu"] = @benchmarkable Metal.@sync copyto!($gpu_mat2, $gpu_mat)
27-
end
9+
let group = addgroup!(group, smname)
10+
11+
# generate some arrays
12+
cpu_mat = rand(rng, Float32, m, n)
13+
gpu_mat = MtlMatrix{Float32,S}(cpu_mat)
14+
gpu_vec = reshape(gpu_mat, length(gpu_mat))
15+
gpu_mat_ints = MtlMatrix{Int64,S}(rand(rng, -10:10, m, n))
16+
gpu_vec_ints = reshape(gpu_mat_ints, length(gpu_mat_ints))
17+
gpu_mat_bools = MtlMatrix{Bool,S}(rand(rng, Bool, m, n))
18+
gpu_vec_bools = reshape(gpu_mat_bools, length(gpu_mat_bools))
19+
20+
group["copy"] = @benchmarkable Metal.@sync copy($gpu_mat)
21+
22+
gpu_mat2 = copy(gpu_mat)
23+
let group = addgroup!(group, "copyto!")
24+
group["cpu_to_gpu"] = @benchmarkable Metal.@sync copyto!($gpu_mat, $cpu_mat)
25+
group["gpu_to_cpu"] = @benchmarkable Metal.@sync copyto!($cpu_mat, $gpu_mat)
26+
group["gpu_to_gpu"] = @benchmarkable Metal.@sync copyto!($gpu_mat2, $gpu_mat)
27+
end
2828

29-
let group = addgroup!(group, "iteration")
30-
group["scalar"] = @benchmarkable Metal.@allowscalar [$gpu_vec[i] for i in 1:10]
29+
let group = addgroup!(group, "iteration")
30+
group["scalar"] = @benchmarkable Metal.@allowscalar [$gpu_vec[i] for i in 1:10]
3131

32-
group["logical"] = @benchmarkable $gpu_vec[$gpu_vec_bools]
32+
group["logical"] = @benchmarkable $gpu_vec[$gpu_vec_bools]
3333

34-
let group = addgroup!(group, "findall")
35-
group["bool"] = @benchmarkable findall($gpu_vec_bools)
36-
group["int"] = @benchmarkable findall(isodd, $gpu_vec_ints)
37-
end
34+
let group = addgroup!(group, "findall")
35+
group["bool"] = @benchmarkable findall($gpu_vec_bools)
36+
group["int"] = @benchmarkable findall(isodd, $gpu_vec_ints)
37+
end
3838

39-
let group = addgroup!(group, "findfirst")
40-
group["bool"] = @benchmarkable findfirst($gpu_vec_bools)
41-
group["int"] = @benchmarkable findfirst(isodd, $gpu_vec_ints)
42-
end
39+
let group = addgroup!(group, "findfirst")
40+
group["bool"] = @benchmarkable findfirst($gpu_vec_bools)
41+
group["int"] = @benchmarkable findfirst(isodd, $gpu_vec_ints)
42+
end
4343

44-
let group = addgroup!(group, "findmin") # findmax
45-
group["1d"] = @benchmarkable Metal.@sync findmin($gpu_vec)
46-
group["2d"] = @benchmarkable Metal.@sync findmin($gpu_mat; dims=1)
44+
let group = addgroup!(group, "findmin") # findmax
45+
group["1d"] = @benchmarkable Metal.@sync findmin($gpu_vec)
46+
group["2d"] = @benchmarkable Metal.@sync findmin($gpu_mat; dims=1)
47+
end
4748
end
4849
end
50+
end
4951

50-
# let group = addgroup!(group, "reverse")
51-
# group["1d"] = @benchmarkable Metal.@sync reverse($gpu_vec)
52-
# group["2d"] = @benchmarkable Metal.@sync reverse($gpu_mat; dims=1)
53-
# group["1d_inplace"] = @benchmarkable Metal.@sync reverse!($gpu_vec)
54-
# group["2d_inplace"] = @benchmarkable Metal.@sync reverse!($gpu_mat; dims=1)
55-
# end
56-
57-
group["broadcast"] = @benchmarkable Metal.@sync $gpu_mat .= 0f0
58-
59-
# no need to test inplace version, which performs the same operation (but with an alloc)
60-
let group = addgroup!(group, "accumulate")
52+
cpu_mat = rand(rng, Float32, m, n)
53+
gpu_mat = MtlMatrix{Float32}(cpu_mat)
54+
gpu_mat_long = MtlMatrix{Float32}(rand(rng, Float32, m_long, n_long))
55+
gpu_vec = reshape(gpu_mat, length(gpu_mat))
56+
gpu_arr_3d = reshape(gpu_mat, (m, 40, 25))
57+
gpu_arr_4d = reshape(gpu_mat, (m, 10, 10, 10))
58+
gpu_mat_ints = MtlMatrix{Int64}(rand(rng, -10:10, m, n))
59+
gpu_mat_long_ints = MtlMatrix{Int64}(rand(rng, -10:10, m_long, n_long))
60+
gpu_vec_ints = reshape(gpu_mat_ints, length(gpu_mat_ints))
61+
62+
# let group = addgroup!(group, "reverse")
63+
# group["1d"] = @benchmarkable Metal.@sync reverse($gpu_vec)
64+
# group["2d"] = @benchmarkable Metal.@sync reverse($gpu_mat; dims=1)
65+
# group["1d_inplace"] = @benchmarkable Metal.@sync reverse!($gpu_vec)
66+
# group["2d_inplace"] = @benchmarkable Metal.@sync reverse!($gpu_mat; dims=1)
67+
# end
68+
group["construct"] = @benchmarkable MtlArray{Int,1}(undef, 1)
69+
70+
group["broadcast"] = @benchmarkable Metal.@sync $gpu_mat .= 0f0
71+
72+
# no need to test inplace version, which performs the same operation (but with an alloc)
73+
let group = addgroup!(group, "accumulate")
74+
let group = addgroup!(group, "Float32")
6175
group["1d"] = @benchmarkable Metal.@sync accumulate(+, $gpu_vec)
62-
group["2d"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat; dims=1)
76+
group["dims=1"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat; dims=1)
77+
group["dims=2"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat; dims=2)
78+
group["dims=1L"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat_long; dims=1)
79+
group["dims=2L"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat_long; dims=2)
80+
end
81+
let group = addgroup!(group, "Int64")
82+
group["1d"] = @benchmarkable Metal.@sync accumulate(+, $gpu_vec_ints)
83+
group["dims=1"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat_ints; dims=1)
84+
group["dims=2"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat_ints; dims=2)
85+
group["dims=1L"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat_long_ints; dims=1)
86+
group["dims=2L"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat_long_ints; dims=2)
6387
end
88+
end
6489

65-
let group = addgroup!(group, "reductions")
66-
let group = addgroup!(group, "reduce")
90+
let group = addgroup!(group, "reductions")
91+
let group = addgroup!(group, "reduce")
92+
let group = addgroup!(group, "Float32")
6793
group["1d"] = @benchmarkable Metal.@sync reduce(+, $gpu_vec)
68-
group["2d"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat; dims=1)
94+
group["dims=1"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat; dims=1)
95+
group["dims=2"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat; dims=2)
96+
group["dims=1L"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat_long; dims=1)
97+
group["dims=2L"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat_long; dims=2)
6998
end
70-
71-
let group = addgroup!(group, "mapreduce")
72-
group["1d"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_vec)
73-
group["2d"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat; dims=1)
99+
let group = addgroup!(group, "Int64")
100+
group["1d"] = @benchmarkable Metal.@sync reduce(+, $gpu_vec_ints)
101+
group["dims=1"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat_ints; dims=1)
102+
group["dims=2"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat_ints; dims=2)
103+
group["dims=1L"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat_long_ints; dims=1)
104+
group["dims=2L"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat_long_ints; dims=2)
74105
end
75-
76-
# used by sum, prod, minimum, maximum, all, any, count
77106
end
78107

79-
let group = addgroup!(group, "random")
80-
let group = addgroup!(group, "rand")
81-
group["Float32"] = @benchmarkable Metal.@sync Metal.rand(Float32, m*n)
82-
group["Int64"] = @benchmarkable Metal.@sync Metal.rand(Int64, m*n)
108+
let group = addgroup!(group, "mapreduce")
109+
let group = addgroup!(group, "Float32")
110+
group["1d"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_vec)
111+
group["dims=1"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat; dims=1)
112+
group["dims=2"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat; dims=2)
113+
group["dims=1L"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat_long; dims=1)
114+
group["dims=2L"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat_long; dims=2)
83115
end
84-
85-
let group = addgroup!(group, "rand!")
86-
group["Float32"] = @benchmarkable Metal.@sync Metal.rand!($gpu_vec)
87-
group["Int64"] = @benchmarkable Metal.@sync Metal.rand!($gpu_vec_ints)
116+
let group = addgroup!(group, "Int64")
117+
group["1d"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_vec_ints)
118+
group["dims=1"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat_ints; dims=1)
119+
group["dims=2"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat_ints; dims=2)
120+
group["dims=1L"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat_long_ints; dims=1)
121+
group["dims=2L"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat_long_ints; dims=2)
88122
end
123+
end
89124

90-
let group = addgroup!(group, "randn")
91-
group["Float32"] = @benchmarkable Metal.@sync Metal.randn(Float32, m*n)
92-
# group["Int64"] = @benchmarkable Metal.@sync Metal.randn(Int64, m*n)
93-
end
125+
# used by sum, prod, minimum, maximum, all, any, count
126+
end
94127

95-
let group = addgroup!(group, "randn!")
96-
group["Float32"] = @benchmarkable Metal.@sync Metal.randn!($gpu_vec)
97-
# group["Int64"] = @benchmarkable Metal.@sync Metal.randn!($gpu_vec_ints)
98-
end
128+
let group = addgroup!(group, "random")
129+
let group = addgroup!(group, "rand")
130+
group["Float32"] = @benchmarkable Metal.@sync Metal.rand(Float32, m*n)
131+
group["Int64"] = @benchmarkable Metal.@sync Metal.rand(Int64, m*n)
99132
end
100133

101-
# let group = addgroup!(group, "sorting")
102-
# group["1d"] = @benchmarkable Metal.@sync sort($gpu_vec)
103-
# group["2d"] = @benchmarkable Metal.@sync sort($gpu_mat; dims=1)
104-
# group["by"] = @benchmarkable Metal.@sync sort($gpu_vec; by=sin)
105-
# end
134+
let group = addgroup!(group, "rand!")
135+
group["Float32"] = @benchmarkable Metal.@sync Metal.rand!($gpu_vec)
136+
group["Int64"] = @benchmarkable Metal.@sync Metal.rand!($gpu_vec_ints)
137+
end
138+
139+
let group = addgroup!(group, "randn")
140+
group["Float32"] = @benchmarkable Metal.@sync Metal.randn(Float32, m*n)
141+
end
106142

107-
let group = addgroup!(group, "permutedims")
108-
group["2d"] = @benchmarkable Metal.@sync permutedims($gpu_mat, (2,1))
109-
group["3d"] = @benchmarkable Metal.@sync permutedims($gpu_arr_3d, (3,1,2))
110-
group["4d"] = @benchmarkable Metal.@sync permutedims($gpu_arr_4d, (2,1,4,3))
143+
let group = addgroup!(group, "randn!")
144+
group["Float32"] = @benchmarkable Metal.@sync Metal.randn!($gpu_vec)
111145
end
112146
end
147+
148+
# let group = addgroup!(group, "sorting")
149+
# group["1d"] = @benchmarkable Metal.@sync sort($gpu_vec)
150+
# group["2d"] = @benchmarkable Metal.@sync sort($gpu_mat; dims=1)
151+
# group["by"] = @benchmarkable Metal.@sync sort($gpu_vec; by=sin)
152+
# end
153+
154+
let group = addgroup!(group, "permutedims")
155+
group["2d"] = @benchmarkable Metal.@sync permutedims($gpu_mat, (2,1))
156+
group["3d"] = @benchmarkable Metal.@sync permutedims($gpu_arr_3d, (3,1,2))
157+
group["4d"] = @benchmarkable Metal.@sync permutedims($gpu_arr_4d, (2,1,4,3))
158+
end

0 commit comments

Comments
 (0)