Skip to content

Commit ef5e19b

Browse files
authored
Merge pull request #121 from JuliaGPU/tb/xunit
Use XUnit.jl for parallel testing.
2 parents 43deaf5 + ce174c6 commit ef5e19b

File tree

8 files changed

+99
-65
lines changed

8 files changed

+99
-65
lines changed

.buildkite/pipeline.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,14 @@ steps:
1515
julia --project -e '
1616
# make sure the 1.6-era Manifest works on this Julia version
1717
using Pkg
18-
Pkg.resolve()'
18+
Pkg.resolve()
19+
20+
# work around XUnit.jl bug
21+
try
22+
Pkg.add(url="https://github.com/maleadt/XUnit.jl", rev="pass_compat")
23+
catch err
24+
@warn "Could not install patched version of XUnit.jl"
25+
end'
1926
if: build.message !~ /\[skip tests\]/
2027
timeout_in_minutes: 120
2128
matrix:

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1+
test/Manifest.toml

test/Project.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
[deps]
22
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3+
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
4+
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
5+
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
36
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
4-
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
57
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
6-
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
8+
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
9+
XUnit = "3e3c03f2-1a94-11e9-2981-050a4ca824ab"

test/blas.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ using LinearAlgebra
44

55
CUDA.CUBLAS.cublasSetMathMode(CUBLAS.handle(), CUBLAS.CUBLAS_TENSOR_OP_MATH)
66

7-
@test_if "blas" @testset "BLAS API" begin
7+
@testset "BLAS API" begin
88
@testset "WMMA GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
99
transpose_b = [false, true],
1010
(A_type, B_type, CD_type, min_dimension) in [(Float16, Float16, Float16, 256), (Float16, Float16, Float32, 128)]
1111

12-
@testset "(M = $M, N = $N, K = $K)" for M in min_dimension .* [1, 2],
12+
@testcase "(M = $M, N = $N, K = $K)" for M in min_dimension .* [1, 2],
1313
N in min_dimension .* [1, 2],
1414
K in min_dimension .* [1, 2]
1515

test/matmul.jl

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@ using LinearAlgebra
66
################################################################################
77

88
@testset "Matmul API" begin
9-
@test_if "fpu compute and data types" @testset "FPU GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) OP ($(OP_M), $(OP_N), $(OP_K))" for
9+
@testset "FPU GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) OP ($(OP_M), $(OP_N), $(OP_K))" for
1010
(A_type, B_type, CD_type, min_dimension) in [
1111
(Float16, Float16, Float32, 128), (Float32, Float32, Float32, 128), (Float32, Float32, Float64, 128), (Float64, Float64, Float64, 128),
12-
(Int16, Int16, Int16, 128), (Int32, Int32, Int32, 128), (Int64, Int64, Int64, 128),
13-
],
14-
transpose_a = [false, true],
15-
transpose_b = [false, true],
12+
(Int16, Int16, Int16, 128), (Int32, Int32, Int32, 128), (Int64, Int64, Int64, 128),
13+
],
14+
transpose_a = [false, true],
15+
transpose_b = [false, true],
1616
(OP_M, OP_N, OP_K) in [(8, 16, 2)]
17-
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2, 2, 1], [1, 1, 2], [2, 2, 2]], [[2048, 2048, 2048]])
17+
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2, 2, 1], [1, 1, 2], [2, 2, 2]], [[2048, 2048, 2048]])
1818
alpha = convert(A_type, 2)
1919
beta = convert(CD_type, 3)
2020

@@ -59,7 +59,7 @@ using LinearAlgebra
5959
# Transpose outputs, if necessary
6060
new_a_h = transpose_a ? transpose(a_h) : a_h
6161
new_b_h = transpose_b ? transpose(b_h) : b_h
62-
62+
6363
if A_type <: Integer
6464
@test all(isapprox.(alpha * CD_type.(new_a_h) * CD_type.(new_b_h) + beta * c_h, Array(d)))
6565
else
@@ -68,13 +68,13 @@ using LinearAlgebra
6868
end
6969
end
7070

71-
@test_if "fpu operator shape" @testset "FPU GEMM OPERATOR SHAPE ($(OP_M), $(OP_N), $(OP_K)) (NN, NT, TN, TT)" for (OP_M, OP_N, OP_K) in [
72-
(4, 8, 1), (8, 8, 1), (4, 16, 1), (4, 8, 2), (8, 16, 2)
71+
@testset "FPU GEMM OPERATOR SHAPE ($(OP_M), $(OP_N), $(OP_K)) (NN, NT, TN, TT)" for (OP_M, OP_N, OP_K) in [
72+
(4, 8, 1), (8, 8, 1), (4, 16, 1), (4, 8, 2), (8, 16, 2)
7373
]
74-
@testset "NN, NT, TN, TT" for (transpose_a, transpose_b) in [(false, false), (false, true), (true, false), (true, true)]
74+
@testcase "NN, NT, TN, TT" for (transpose_a, transpose_b) in [(false, false), (false, true), (true, false), (true, true)]
7575
(M, N, K) = (128, 128, 128)
7676
(A_type, B_type, CD_type) = (Float32, Float32, Float32)
77-
77+
7878
alpha = convert(A_type, 2)
7979
beta = convert(CD_type, 3)
8080

@@ -114,18 +114,18 @@ using LinearAlgebra
114114
# Transpose outputs, if necessary
115115
new_a_h = transpose_a ? transpose(a_h) : a_h
116116
new_b_h = transpose_b ? transpose(b_h) : b_h
117-
117+
118118
@test all(isapprox.(alpha * CD_type.(new_a_h) * CD_type.(new_b_h) + beta * c_h, Array(d); rtol = sqrt(eps(A_type))))
119119
end
120120
end
121121

122-
@test_if "tropical fpu" @testset "TROPICAL GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) OP ($(OP_M), $(OP_N), $(OP_K))" for
123-
(A_type, B_type, CD_type, min_dimension) in [(Float32, Float32, Float32, 128)],
124-
transpose_a = [false, true],
125-
transpose_b = [false, true],
122+
@testset "TROPICAL GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) OP ($(OP_M), $(OP_N), $(OP_K))" for
123+
(A_type, B_type, CD_type, min_dimension) in [(Float32, Float32, Float32, 128)],
124+
transpose_a = [false, true],
125+
transpose_b = [false, true],
126126
(OP_M, OP_N, OP_K) in [(8, 16, 2)]
127127

128-
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2, 2, 1], [1, 1, 2], [2, 2, 2]])
128+
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2, 2, 1], [1, 1, 2], [2, 2, 2]])
129129
a_h = rand(A_type, (M, K)) / sqrt(A_type(K))
130130
b_h = rand(B_type, (K, N)) / sqrt(B_type(K))
131131
c_h = rand(CD_type, (M, N))
@@ -135,7 +135,7 @@ using LinearAlgebra
135135
for j in 1 : N
136136
d_h[i, j] = c_h[i, j]
137137
for k in 1 : K
138-
d_h[i, j] = max(a_h[i, k] + b_h[k, j], d_h[i, j])
138+
d_h[i, j] = max(a_h[i, k] + b_h[k, j], d_h[i, j])
139139
end
140140
end
141141
end
@@ -164,16 +164,16 @@ using LinearAlgebra
164164
)
165165

166166
GemmKernels.matmul(a, b, c, d, conf; kernel = Kernel.matmul_pipelined)
167-
167+
168168
@test all(isapprox.(d_h, Array(d); rtol = sqrt(eps(A_type))))
169169
end
170170
end
171171

172172

173-
@test_if "wmma" @testset "WMMA GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
173+
@testset "WMMA GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
174174
transpose_b = [false, true],
175175
(A_type, B_type, CD_type, min_dimension) in [(Float16, Float16, Float16, 256), (Float16, Float16, Float32, 128)]
176-
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2,2,1], [1,1,2], [2,2,2]], [[2048, 2048, 2048]])
176+
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2,2,1], [1,1,2], [2,2,2]], [[2048, 2048, 2048]])
177177
alpha = convert(A_type, 2)
178178
beta = convert(CD_type, 3)
179179

@@ -217,10 +217,10 @@ using LinearAlgebra
217217
end
218218
end
219219

220-
@test_if "bias" @testset "WMMA GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) + bias" for transpose_a = [false, true],
220+
@testset "WMMA GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) + bias" for transpose_a = [false, true],
221221
transpose_b = [false, true]
222222

223-
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
223+
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
224224
a_h = rand(Float16, (M, K)) / sqrt(Float16(K))
225225
b_h = rand(Float16, (K, N)) / sqrt(Float16(K))
226226
c_h = rand(Float32, (M, N))
@@ -268,8 +268,8 @@ using LinearAlgebra
268268
end
269269
end
270270

271-
@test_if "diagonal" @testset "WMMA GEMM (A = diagonal, B = $( !transpose_b ? 'N' : 'T' ))" for transpose_b = [false, true]
272-
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
271+
@testset "WMMA GEMM (A = diagonal, B = $( !transpose_b ? 'N' : 'T' ))" for transpose_b = [false, true]
272+
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
273273
@assert M == K "Diagonal only supports square A matrix (M == K)"
274274

275275
transpose_a = false
@@ -312,10 +312,10 @@ using LinearAlgebra
312312
end
313313
end
314314

315-
@test_if "complex" @testset "WMMA Complex GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
315+
@testset "WMMA Complex GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
316316
transpose_b = [false, true]
317317

318-
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) = [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
318+
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) = [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
319319
a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K));
320320
b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K));
321321
c_h = rand(Complex{Float32}, (M, N));
@@ -377,8 +377,8 @@ using LinearAlgebra
377377
end
378378
end
379379

380-
@test_if "dual" @testset "WMMA Dual GEMM" begin
381-
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
380+
@testset "WMMA Dual GEMM" begin
381+
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
382382
a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K));
383383
b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K));
384384
c_h = rand(Complex{Float32}, (M, N));

test/runtests.jl

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,25 @@
1-
using GemmKernels
2-
using Test
31

4-
import CUDA
5-
import InteractiveUtils
2+
using Distributed
63

7-
@info "Julia details\n\n" * sprint(io->InteractiveUtils.versioninfo(io))
8-
@info "CUDA details\n\n" * sprint(io->CUDA.versioninfo(io))
4+
# determine parallelism
5+
cpu_jobs = Sys.CPU_THREADS
6+
memory_jobs = Int(Sys.free_memory()) ÷ (2 * 2^30)
7+
jobs = min(cpu_jobs, memory_jobs)
8+
@info "Running $jobs tests in parallel. If this is too many, set the `JULIA_CPU_THREADS` environment variable."
99

10-
macro test_if(label, expr)
11-
return quote
12-
if isempty(ARGS) || $(label) in ARGS
13-
$(esc(expr))
14-
else
15-
nothing
16-
end
17-
end
10+
# add workers
11+
exeflags = Base.julia_cmd()
12+
filter!(exeflags.exec) do c
13+
return !(startswith(c, "--depwarn") || startswith(c, "--check-bounds"))
1814
end
19-
20-
CUDA.allowscalar(false)
21-
22-
@testset "GemmKernels.jl" begin
23-
include("tiling.jl")
24-
include("matmul.jl")
25-
include("blas.jl")
15+
push!(exeflags.exec, "--check-bounds=yes")
16+
push!(exeflags.exec, "--startup-file=no")
17+
push!(exeflags.exec, "--depwarn=yes")
18+
push!(exeflags.exec, "--project=$(Base.active_project())")
19+
exename = popfirst!(exeflags.exec)
20+
withenv("JULIA_NUM_THREADS" => 1, "OPENBLAS_NUM_THREADS" => 1) do
21+
addprocs(jobs; exename, exeflags)
2622
end
23+
24+
@everywhere using XUnit
25+
runtests("tests.jl")

test/tests.jl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
using Distributed, XUnit, Dates
2+
using CUDA, GemmKernels
3+
CUDA.allowscalar(false)
4+
5+
if myid() == 1
6+
using InteractiveUtils
7+
@info "Julia details:\n" * sprint(io->InteractiveUtils.versioninfo(io))
8+
@info "CUDA details:\n" * sprint(io->CUDA.versioninfo(io))
9+
end
10+
11+
t0 = now()
12+
try
13+
@testset runner=DistributedTestRunner() "GemmKernels.jl" begin
14+
include("tiling.jl")
15+
include("matmul.jl")
16+
include("blas.jl")
17+
end
18+
finally
19+
if myid() == 1
20+
t1 = now()
21+
elapsed = canonicalize(Dates.CompoundPeriod(t1-t0))
22+
println("Testing finished in $elapsed")
23+
end
24+
end
25+

test/tiling.jl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,39 @@ using GemmKernels.Tiling
22

33
################################################################################
44

5-
@test_if "tiling" @testset "Tiling API" begin
5+
@testset "Tiling API" begin
66
@testset "Tiles" begin
7-
@testset "Index" begin
7+
@testcase "Index" begin
88
@test Tile(M = 4, N = 4, K = 4).index == (M = 0, N = 0, K = 0)
99
end
1010

11-
@testset "Projection" begin
11+
@testcase "Projection" begin
1212
@test Tile(M = 1, N = 2, K = 3).MN == Tile(M = 1, N = 2)
1313
@test Tile(M = 1, N = 2, K = 3).NM == Tile(N = 2, M = 1)
1414
@test Tile(M = 1, N = 2, K = 3).M == Tile(M = 1)
1515
@test Tile(M = 1, N = 2, K = 3).KMN == Tile(K = 3, M = 1, N = 2)
1616
end
1717

18-
@testset "Transposition" begin
18+
@testcase "Transposition" begin
1919
@test transpose(Tile(M = 1, N = 2)) == Tile(N = 2, M = 1)
2020
@test transpose(Tile(M = 1, N = 2, K = 3)) == Tile(K = 3, N = 2, M = 1)
2121
end
2222

23-
@testset "Translate base" begin
23+
@testcase "Translate base" begin
2424
tile = translate_base(Tile(M = 10, N = 20), (M = 1, N = 2))
2525
@test tile.size == (M = 10, N = 20)
2626
@test tile.base == (M = 1, N = 2)
2727
@test tile.offset == (M = 0, N = 0)
2828
end
2929

30-
@testset "Translate offset" begin
30+
@testcase "Translate offset" begin
3131
tile = translate_offset(Tile(M = 10, N = 20), (M = 1, N = 2))
3232
@test tile.size == (M = 10, N = 20)
3333
@test tile.base == (M = 0, N = 0)
3434
@test tile.offset == (M = 1, N = 2)
3535
end
3636

37-
@testset "Linearise" begin
37+
@testcase "Linearise" begin
3838
tile = Tile(M = 3, N = 5)
3939
for i = 0 : 2, j = 0 : 4
4040
tile_t = translate_offset(tile, (M = i, N = j))
@@ -45,7 +45,7 @@ using GemmKernels.Tiling
4545
end
4646

4747
@testset "Tile iteration" begin
48-
@testset "Subdivide" begin
48+
@testcase "Subdivide" begin
4949
tile_size = (M = 8, N = 4)
5050
num_tiles = (M = 2, N = 4)
5151
tile = Tile(M = num_tiles.M * tile_size.M, N = num_tiles.N * tile_size.N)
@@ -59,7 +59,7 @@ using GemmKernels.Tiling
5959
end
6060
end
6161

62-
@testset "Parallellise" begin
62+
@testcase "Parallellise" begin
6363
tile_size = (M = 8, N = 4)
6464
num_tiles = (M = 2, N = 8)
6565
tile = Tile(M = num_tiles.M * tile_size.M, N = num_tiles.N * tile_size.N)

0 commit comments

Comments
 (0)