1
- using Flux, Test, LinearAlgebra, Random, Statistics
2
- using CUDA, CUDAKernels, LoopVectorization
1
+ using Flux, Functors, Test, LinearAlgebra, Random, Statistics
2
+ using CUDA, CUDAKernels, KernelAbstractions, LoopVectorization
3
3
using Tullio
4
4
using NeuralAttentionlib
5
5
using BenchmarkTools
6
-
6
+ CUDA . allowscalar ( false )
7
7
const A3{T} = AbstractArray{T, 3 }
8
8
9
9
"""
@@ -144,11 +144,6 @@ function perf(dim, len, batch_size, num_heads)
144
144
mha = MultiHeadAttention (dim, num_heads)
145
145
x = rand (Float32, (dim, len, batch_size))
146
146
147
- y = mha (x, x, x)
148
- @test y isa Array{Float32, 3 }
149
- @test size (y) == (dim, len, batch_size)
150
-
151
-
152
147
println (" tullio" )
153
148
@btime $ mha ($ x, v= :tullio );
154
149
@btime gradient (m -> sum (m ($ x, v= :tullio )), $ mha);
@@ -172,4 +167,29 @@ function perf(dim, len, batch_size, num_heads)
172
167
return nothing
173
168
end
174
169
175
- perf (64 , 100 , 32 , 8 )
170
+ function test (dim, len, batch_size, num_heads)
171
+ mha = MultiHeadAttention (dim, num_heads)
172
+ x = rand (Float32, (dim, len, batch_size))
173
+ y = mha (x, v= :tullio )
174
+ @test y isa Array{Float32, 3 }
175
+ @test size (y) == (dim, len, batch_size)
176
+ y2 = mha (x, v= :nnalib )
177
+ @test size (y) == size (y2)
178
+ @test y2 ≈ y
179
+
180
+ if CUDA. functional ()
181
+ mha_gpu = mha |> gpu
182
+ x_gpu = x |> gpu
183
+
184
+ y_gpu = mha_gpu (x_gpu, v= :tullio )
185
+ y_gpu2 = mha_gpu (x_gpu, v= :nnalib )
186
+ @test Array (y_gpu) ≈ Array (y_gpu2)
187
+ @test Array (y_gpu) ≈ y
188
+ end
189
+ return nothing
190
+ end
191
+
192
+
193
+ test (12 , 3 , 2 , 4 )
194
+
195
+ perf (64 , 100 , 32 , 4 )
0 commit comments