| 
 | 1 | +group = addgroup!(SUITE, "array")  | 
 | 2 | + | 
 | 3 | +const m = 512  | 
 | 4 | +const n = 1000  | 
 | 5 | + | 
 | 6 | +# generate some arrays  | 
 | 7 | +cpu_mat = rand(rng, Float32, m, n)  | 
 | 8 | +gpu_mat = MtlArray{Float32}(undef, size(cpu_mat))  | 
 | 9 | +gpu_vec = reshape(gpu_mat, length(gpu_mat))  | 
 | 10 | +gpu_arr_3d = reshape(gpu_mat, (m, 40, 25))  | 
 | 11 | +gpu_arr_4d = reshape(gpu_mat, (m, 10, 10, 10))  | 
 | 12 | +gpu_mat_ints = MtlArray(rand(rng, Int, m, n))  | 
 | 13 | +gpu_vec_ints = reshape(gpu_mat_ints, length(gpu_mat_ints))  | 
 | 14 | +gpu_mat_bools = MtlArray(rand(rng, Bool, m, n))  | 
 | 15 | +gpu_vec_bools = reshape(gpu_mat_bools, length(gpu_mat_bools))  | 
 | 16 | + | 
 | 17 | +group["construct"] = @benchmarkable MtlArray{Int}(undef, 1)  | 
 | 18 | + | 
 | 19 | +group["copy"] = @async_benchmarkable copy($gpu_mat)  | 
 | 20 | + | 
 | 21 | +gpu_mat2 = copy(gpu_mat)  | 
 | 22 | +let group = addgroup!(group, "copyto!")  | 
 | 23 | +    group["cpu_to_gpu"] = @async_benchmarkable copyto!($gpu_mat, $cpu_mat)  | 
 | 24 | +    group["gpu_to_cpu"] = @async_benchmarkable copyto!($cpu_mat, $gpu_mat)  | 
 | 25 | +    group["gpu_to_gpu"] = @async_benchmarkable copyto!($gpu_mat2, $gpu_mat)  | 
 | 26 | +end  | 
 | 27 | + | 
 | 28 | +let group = addgroup!(group, "iteration")  | 
 | 29 | +    group["scalar"] = @benchmarkable Metal.@allowscalar [$gpu_vec[i] for i in 1:10]  | 
 | 30 | + | 
 | 31 | +    group["logical"] = @benchmarkable $gpu_vec[$gpu_vec_bools]  | 
 | 32 | + | 
 | 33 | +    let group = addgroup!(group, "findall")  | 
 | 34 | +        group["bool"] = @benchmarkable findall($gpu_vec_bools)  | 
 | 35 | +        group["int"] = @benchmarkable findall(isodd, $gpu_vec_ints)  | 
 | 36 | +    end  | 
 | 37 | + | 
 | 38 | +    let group = addgroup!(group, "findfirst")  | 
 | 39 | +        group["bool"] = @benchmarkable findfirst($gpu_vec_bools)  | 
 | 40 | +        group["int"] = @benchmarkable findfirst(isodd, $gpu_vec_ints)  | 
 | 41 | +    end  | 
 | 42 | + | 
 | 43 | +    let group = addgroup!(group, "findmin") # findmax  | 
 | 44 | +        group["1d"] = @async_benchmarkable findmin($gpu_vec)  | 
 | 45 | +        group["2d"] = @async_benchmarkable findmin($gpu_mat; dims=1)  | 
 | 46 | +    end  | 
 | 47 | +end  | 
 | 48 | + | 
 | 49 | +# let group = addgroup!(group, "reverse")  | 
 | 50 | +#     group["1d"] = @async_benchmarkable reverse($gpu_vec)  | 
 | 51 | +#     group["2d"] = @async_benchmarkable reverse($gpu_mat; dims=1)  | 
 | 52 | +#     group["1d_inplace"] = @async_benchmarkable reverse!($gpu_vec)  | 
 | 53 | +#     group["2d_inplace"] = @async_benchmarkable reverse!($gpu_mat; dims=1)  | 
 | 54 | +# end  | 
 | 55 | + | 
 | 56 | +group["broadcast"] = @async_benchmarkable $gpu_mat .= 0f0  | 
 | 57 | + | 
 | 58 | +# no need to test inplace version, which performs the same operation (but with an alloc)  | 
 | 59 | +let group = addgroup!(group, "accumulate")  | 
 | 60 | +    group["1d"] = @async_benchmarkable accumulate(+, $gpu_vec)  | 
 | 61 | +    group["2d"] = @async_benchmarkable accumulate(+, $gpu_mat; dims=1)  | 
 | 62 | +end  | 
 | 63 | + | 
 | 64 | +let group = addgroup!(group, "reductions")  | 
 | 65 | +    let group = addgroup!(group, "reduce")  | 
 | 66 | +        group["1d"] = @async_benchmarkable reduce(+, $gpu_vec)  | 
 | 67 | +        group["2d"] = @async_benchmarkable reduce(+, $gpu_mat; dims=1)  | 
 | 68 | +    end  | 
 | 69 | + | 
 | 70 | +    let group = addgroup!(group, "mapreduce")  | 
 | 71 | +        group["1d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_vec)  | 
 | 72 | +        group["2d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat; dims=1)  | 
 | 73 | +    end  | 
 | 74 | + | 
 | 75 | +    # used by sum, prod, minimum, maximum, all, any, count  | 
 | 76 | +end  | 
 | 77 | + | 
 | 78 | +let group = addgroup!(group, "random")  | 
 | 79 | +    let group = addgroup!(group, "rand")  | 
 | 80 | +        group["Float32"] = @async_benchmarkable Metal.rand(Float32, m*n)  | 
 | 81 | +        group["Int64"] = @async_benchmarkable Metal.rand(Int64, m*n)  | 
 | 82 | +    end  | 
 | 83 | + | 
 | 84 | +    let group = addgroup!(group, "rand!")  | 
 | 85 | +        group["Float32"] = @async_benchmarkable Metal.rand!($gpu_vec)  | 
 | 86 | +        group["Int64"] = @async_benchmarkable Metal.rand!($gpu_vec_ints)  | 
 | 87 | +    end  | 
 | 88 | + | 
 | 89 | +    let group = addgroup!(group, "randn")  | 
 | 90 | +        group["Float32"] = @async_benchmarkable Metal.randn(Float32, m*n)  | 
 | 91 | +        # group["Int64"] = @async_benchmarkable Metal.randn(Int64, m*n)  | 
 | 92 | +    end  | 
 | 93 | + | 
 | 94 | +    let group = addgroup!(group, "randn!")  | 
 | 95 | +        group["Float32"] = @async_benchmarkable Metal.randn!($gpu_vec)  | 
 | 96 | +        # group["Int64"] = @async_benchmarkable Metal.randn!($gpu_vec_ints)  | 
 | 97 | +    end  | 
 | 98 | +end  | 
 | 99 | + | 
 | 100 | +# let group = addgroup!(group, "sorting")  | 
 | 101 | +#     group["1d"] = @async_benchmarkable sort($gpu_vec)  | 
 | 102 | +#     group["2d"] = @async_benchmarkable sort($gpu_mat; dims=1)  | 
 | 103 | +#     group["by"] = @async_benchmarkable sort($gpu_vec; by=sin)  | 
 | 104 | +# end  | 
 | 105 | + | 
 | 106 | +let group = addgroup!(group, "permutedims")  | 
 | 107 | +    group["2d"] = @async_benchmarkable permutedims($gpu_mat, (2,1))  | 
 | 108 | +    group["3d"] = @async_benchmarkable permutedims($gpu_arr_3d, (3,1,2))  | 
 | 109 | +    group["4d"] = @async_benchmarkable permutedims($gpu_arr_4d, (2,1,4,3))  | 
 | 110 | +end  | 
0 commit comments