Added a few more tests for integer/float mixing.

chriselrod · chriselrod · commit 7a58714611dc · 2020-01-07T07:02:49.000-05:00
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "LoopVectorization"
 uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
 authors = ["Chris Elrod <elrodc@gmail.com>"]
-version = "0.3.0"
+version = "0.3.1"
 
 [deps]
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -14,9 +14,9 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
 [compat]
 MacroTools = "0.5"
 Parameters = "0.12.0"
-SIMDPirates = "0.1.1"
+SIMDPirates = "0.1.3"
 SLEEFPirates = "0.1.1"
-VectorizationBase = "0.1.4"
+VectorizationBase = "0.1.5"
 julia = "1.3.0"
 
 [extras]
diff --git a/src/constructors.jl b/src/constructors.jl
@@ -48,6 +48,39 @@ function LoopSet(q::Expr)
     ls
 end
 
+"""
+    @avx
+
+Annotate a `for` loop, or a set of nested `for` loops whose bounds are constant across iterations, to optimize the computation. For example:
+
+    function AmulBavx!(C, A, B)
+        @avx for m ∈ 1:size(A,1), n ∈ 1:size(B,2)
+            Cₘₙ = zero(eltype(C))
+            for k ∈ 1:size(A,2)
+                Cₘₙ += A[m,k] * B[k,n]
+            end
+            C[m,n] = Cₘₙ
+        end
+    end
+
+The macro models the set of nested loops, and chooses a 
+
+It may also apply to broadcasts:
+
+```jldoctest
+julia> a = rand(100);
+
+julia> b = @avx exp.(2 .* a);
+
+julia> c = similar(b);
+
+julia> @avx @. c = exp(2a);
+
+julia> b ≈ c 
+true
+```
+
+"""
 macro avx(q)
     q2 = if q.head === :for
         lower(LoopSet(q))
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -147,9 +147,12 @@ using LinearAlgebra
             return C
         end
         
-        for T ∈ (Float32, Float64)
+        for T ∈ (Float32, Float64, Int32, Int64)
             M, K, N = 72, 75, 68;
-            C = Matrix{T}(undef, M, N); A = randn(T, M, K); B = randn(T, K, N);
+            TC = sizeof(T) == 4 ? Float32 : Float64
+            R = T <: Integer ? (T(1):T(1000)) : T
+            C = Matrix{TC}(undef, M, N);
+            A = rand(R, M, K); B = rand(R, K, N);
             C2 = similar(C);
             AmulBavx!(C, A, B)
             AmulB!(C2, A, B)
@@ -159,7 +162,7 @@ using LinearAlgebra
             @test C ≈ C2
             fill!(C, 9999.999); mulCAtB_2x2block!(C, At, B);
             @test C ≈ C2
-            Aₘ= rand(T, M, 2); Aₖ = rand(T, 2, K);
+            Aₘ= rand(R, M, 2); Aₖ = rand(R, 2, K);
             rank2AmulBavx!(C, Aₘ, Aₖ, B)
             rank2AmulB!(C2, Aₘ, Aₖ, B)
             @test C ≈ C2