Skip to content

Commit 7a58714

Browse files
committed
Added a few more tests for integer/float mixing.
1 parent f965c5f commit 7a58714

File tree

3 files changed

+42
-6
lines changed

3 files changed

+42
-6
lines changed

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.3.0"
4+
version = "0.3.1"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -14,9 +14,9 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1414
[compat]
1515
MacroTools = "0.5"
1616
Parameters = "0.12.0"
17-
SIMDPirates = "0.1.1"
17+
SIMDPirates = "0.1.3"
1818
SLEEFPirates = "0.1.1"
19-
VectorizationBase = "0.1.4"
19+
VectorizationBase = "0.1.5"
2020
julia = "1.3.0"
2121

2222
[extras]

src/constructors.jl

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,39 @@ function LoopSet(q::Expr)
4848
ls
4949
end
5050

51+
"""
52+
@avx
53+
54+
Annotate a `for` loop, or a set of nested `for` loops whose bounds are constant across iterations, to optimize the computation. For example:
55+
56+
function AmulBavx!(C, A, B)
57+
@avx for m ∈ 1:size(A,1), n ∈ 1:size(B,2)
58+
Cₘₙ = zero(eltype(C))
59+
for k ∈ 1:size(A,2)
60+
Cₘₙ += A[m,k] * B[k,n]
61+
end
62+
C[m,n] = Cₘₙ
63+
end
64+
end
65+
66+
The macro models the set of nested loops, and chooses a
67+
68+
It may also apply to broadcasts:
69+
70+
```jldoctest
71+
julia> a = rand(100);
72+
73+
julia> b = @avx exp.(2 .* a);
74+
75+
julia> c = similar(b);
76+
77+
julia> @avx @. c = exp(2a);
78+
79+
julia> b ≈ c
80+
true
81+
```
82+
83+
"""
5184
macro avx(q)
5285
q2 = if q.head === :for
5386
lower(LoopSet(q))

test/runtests.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,12 @@ using LinearAlgebra
147147
return C
148148
end
149149

150-
for T (Float32, Float64)
150+
for T (Float32, Float64, Int32, Int64)
151151
M, K, N = 72, 75, 68;
152-
C = Matrix{T}(undef, M, N); A = randn(T, M, K); B = randn(T, K, N);
152+
TC = sizeof(T) == 4 ? Float32 : Float64
153+
R = T <: Integer ? (T(1):T(1000)) : T
154+
C = Matrix{TC}(undef, M, N);
155+
A = rand(R, M, K); B = rand(R, K, N);
153156
C2 = similar(C);
154157
AmulBavx!(C, A, B)
155158
AmulB!(C2, A, B)
@@ -159,7 +162,7 @@ using LinearAlgebra
159162
@test C C2
160163
fill!(C, 9999.999); mulCAtB_2x2block!(C, At, B);
161164
@test C C2
162-
Aₘ= rand(T, M, 2); Aₖ = rand(T, 2, K);
165+
Aₘ= rand(R, M, 2); Aₖ = rand(R, 2, K);
163166
rank2AmulBavx!(C, Aₘ, Aₖ, B)
164167
rank2AmulB!(C2, Aₘ, Aₖ, B)
165168
@test C C2

0 commit comments

Comments
 (0)