Skip to content

Commit 822437c

Browse files
authored
Merge branch 'master' into bugfix_0x0_sparse_conversion
2 parents f565385 + fb0a528 commit 822437c

File tree

2 files changed

+340
-601
lines changed

2 files changed

+340
-601
lines changed

perf/array.jl

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,18 @@ group = addgroup!(SUITE, "array")
22

33
const m = 512
44
const n = 1000
5+
const m_long = 3
6+
const n_long = 1_000_000
57

68
# generate some arrays
79
cpu_mat = rand(rng, Float32, m, n)
8-
gpu_mat = CuArray{Float32}(undef, size(cpu_mat))
10+
gpu_mat = CuArray{Float32}(cpu_mat)
11+
gpu_mat_long = CuArray{Float32}(rand(rng, Float32, m_long, n_long))
912
gpu_vec = reshape(gpu_mat, length(gpu_mat))
1013
gpu_arr_3d = reshape(gpu_mat, (m, 40, 25))
1114
gpu_arr_4d = reshape(gpu_mat, (m, 10, 10, 10))
12-
gpu_mat_ints = CuArray(rand(rng, Int, m, n))
15+
gpu_mat_ints = CuArray(rand(rng, -10:10, m, n))
16+
gpu_mat_long_ints = CuArray(rand(rng, -10:10, m_long, n_long))
1317
gpu_vec_ints = reshape(gpu_mat_ints, length(gpu_mat_ints))
1418
gpu_mat_bools = CuArray(rand(rng, Bool, m, n))
1519
gpu_vec_bools = reshape(gpu_mat_bools, length(gpu_mat_bools))
@@ -57,19 +61,57 @@ group["broadcast"] = @async_benchmarkable $gpu_mat .= 0f0
5761

5862
# no need to test inplace version, which performs the same operation (but with an alloc)
5963
let group = addgroup!(group, "accumulate")
60-
group["1d"] = @async_benchmarkable accumulate(+, $gpu_vec)
61-
group["2d"] = @async_benchmarkable accumulate(+, $gpu_mat; dims=1)
64+
let group = addgroup!(group, "Float32")
65+
group["1d"] = @async_benchmarkable accumulate(+, $gpu_vec)
66+
group["dims=1"] = @async_benchmarkable accumulate(+, $gpu_mat; dims=1)
67+
group["dims=2"] = @async_benchmarkable accumulate(+, $gpu_mat; dims=2)
68+
69+
group["dims=1L"] = @async_benchmarkable accumulate(+, $gpu_mat_long; dims=1)
70+
group["dims=2L"] = @async_benchmarkable accumulate(+, $gpu_mat_long; dims=2)
71+
end
72+
let group = addgroup!(group, "Int64")
73+
group["1d"] = @async_benchmarkable accumulate(+, $gpu_vec_ints)
74+
group["dims=1"] = @async_benchmarkable accumulate(+, $gpu_mat_ints; dims=1)
75+
group["dims=2"] = @async_benchmarkable accumulate(+, $gpu_mat_ints; dims=2)
76+
77+
group["dims=1L"] = @async_benchmarkable accumulate(+, $gpu_mat_long_ints; dims=1)
78+
group["dims=2L"] = @async_benchmarkable accumulate(+, $gpu_mat_long_ints; dims=2)
79+
end
6280
end
6381

6482
let group = addgroup!(group, "reductions")
6583
let group = addgroup!(group, "reduce")
66-
group["1d"] = @async_benchmarkable reduce(+, $gpu_vec)
67-
group["2d"] = @async_benchmarkable reduce(+, $gpu_mat; dims=1)
84+
let group = addgroup!(group, "Float32")
85+
group["1d"] = @async_benchmarkable reduce(+, $gpu_vec)
86+
group["dims=1"] = @async_benchmarkable reduce(+, $gpu_mat; dims=1)
87+
group["dims=2"] = @async_benchmarkable reduce(+, $gpu_mat; dims=2)
88+
group["dims=1L"] = @async_benchmarkable reduce(+, $gpu_mat_long; dims=1)
89+
group["dims=2L"] = @async_benchmarkable reduce(+, $gpu_mat_long; dims=2)
90+
end
91+
let group = addgroup!(group, "Int64")
92+
group["1d"] = @async_benchmarkable reduce(+, $gpu_vec_ints)
93+
group["dims=1"] = @async_benchmarkable reduce(+, $gpu_mat_ints; dims=1)
94+
group["dims=2"] = @async_benchmarkable reduce(+, $gpu_mat_ints; dims=2)
95+
group["dims=1L"] = @async_benchmarkable reduce(+, $gpu_mat_long_ints; dims=1)
96+
group["dims=2L"] = @async_benchmarkable reduce(+, $gpu_mat_long_ints; dims=2)
97+
end
6898
end
6999

70100
let group = addgroup!(group, "mapreduce")
71-
group["1d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_vec)
72-
group["2d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat; dims=1)
101+
let group = addgroup!(group, "Float32")
102+
group["1d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_vec)
103+
group["dims=1"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat; dims=1)
104+
group["dims=2"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat; dims=2)
105+
group["dims=1L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long; dims=1)
106+
group["dims=2L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long; dims=2)
107+
end
108+
let group = addgroup!(group, "Int64")
109+
group["1d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_vec_ints)
110+
group["dims=1"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_ints; dims=1)
111+
group["dims=2"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_ints; dims=2)
112+
group["dims=1L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long_ints; dims=1)
113+
group["dims=2L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long_ints; dims=2)
114+
end
73115
end
74116

75117
# used by sum, prod, minimum, maximum, all, any, count
@@ -88,12 +130,10 @@ let group = addgroup!(group, "random")
88130

89131
let group = addgroup!(group, "randn")
90132
group["Float32"] = @async_benchmarkable CUDA.randn(Float32, m*n)
91-
#group["Int64"] = @async_benchmarkable CUDA.randn(Int64, m*n)
92133
end
93134

94135
let group = addgroup!(group, "randn!")
95136
group["Float32"] = @async_benchmarkable CUDA.randn!($gpu_vec)
96-
#group["Int64"] = @async_benchmarkable CUDA.randn!($gpu_vec_ints)
97137
end
98138
end
99139

0 commit comments

Comments
 (0)