Added missing sizeequivalent methods, and fixed bug in copying code. fixes #48.

chriselrod · chriselrod · commit 150d3371f88f · 2020-02-08T13:01:42.000-05:00
diff --git a/src/add_stores.jl b/src/add_stores.jl
@@ -21,6 +21,7 @@ function add_copystore!(
     ls::LoopSet, parent::Operation, mpref::ArrayReferenceMetaPosition, elementbytes::Int
 )
     op = add_compute!(ls, gensym(), :identity, [parent], elementbytes)
+    pushfirst!(mpref.parents, parent)
     add_store!(ls, name(op), mpref, elementbytes, op)
 end
 
@@ -42,8 +43,6 @@ function add_store!(
             if ref == opp.ref.ref
                 id = opp.identifier
                 break
-            # else
-                # @show ref opp.ref.ref
             end
         end
         add_pvar = false
diff --git a/src/graphs.jl b/src/graphs.jl
@@ -466,7 +466,8 @@ function add_operation!(
         array, rawindices = ref_from_expr(RHS)
         RHS_ref = array_reference_meta!(ls, array, rawindices, elementbytes)
         op = add_load!(ls, gensym(LHS_sym), RHS_ref, elementbytes)
-        add_compute!(ls, LHS_sym, :identity, [op], elementbytes)
+        iop = add_compute!(ls, LHS_sym, :identity, [op], elementbytes)
+        pushfirst!(LHS_ref.parents, iop)
     elseif RHS.head === :call
         f = first(RHS.args)
         if f === :getindex
diff --git a/src/lowering.jl b/src/lowering.jl
@@ -360,21 +360,21 @@ end
 @inline sizeequivalentfloat(::Type{Float16}, x::Float64) = Float16(x)
 @inline sizeequivalentfloat(::Type{Float16}, x::Float32) = Float16(x)
 @inline sizeequivalentint(::Type{T}, x::T) where {T} = x
-@inline sizeequivalentint(::Type{Int64}, x::Int64) = x
 @inline sizeequivalentint(::Type{Int64}, x::Int32) = Int64(x)
 @inline sizeequivalentint(::Type{Int64}, x::Int16) = Int64(x)
 @inline sizeequivalentint(::Type{Int32}, x::Int64) = Int32(x)
-@inline sizeequivalentint(::Type{Int32}, x::Int32) = x
 @inline sizeequivalentint(::Type{Int32}, x::Int16) = Int32(x)
 @inline sizeequivalentint(::Type{Int16}, x::Int64) = Int16(x)
 @inline sizeequivalentint(::Type{Int16}, x::Int32) = Int16(x)
-@inline sizeequivalentint(::Type{Int16}, x::Int16) = x
+@inline sizeequivalentint(::Type{Float64}, x::Int64) = x
 @inline sizeequivalentint(::Type{Float64}, x::Int32) = Int64(x)
 @inline sizeequivalentint(::Type{Float64}, x::Int16) = Int64(x)
 @inline sizeequivalentint(::Type{Float32}, x::Int64) = Int32(x)
+@inline sizeequivalentint(::Type{Float32}, x::Int32) = x
 @inline sizeequivalentint(::Type{Float32}, x::Int16) = Int32(x)
 @inline sizeequivalentint(::Type{Float16}, x::Int64) = Int16(x)
 @inline sizeequivalentint(::Type{Float16}, x::Int32) = Int16(x)
+@inline sizeequivalentint(::Type{Float16}, x::Int16) = x
 
 
 function setup_preamble!(ls::LoopSet, W::Symbol, typeT::Symbol, vectorized::Symbol, unrolled::Symbol, tiled::Symbol, U::Int)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -957,49 +957,6 @@ end
             softmax3_core_avx4!(lse, qq, xx, tmpmax, maxk, nk)
         end
 
-        function copyavx1!(x, y)
-            @avx for i ∈ eachindex(x)
-                x[i] = y[i]
-            end
-        end
-        function copy_avx1!(x, y)
-            @_avx for i ∈ eachindex(x)
-                x[i] = y[i]
-            end
-        end
-        function copyavx2!(x, y)
-            @avx for i ∈ eachindex(x)
-                yᵢ = y[i]
-                x[i] = yᵢ
-            end
-        end
-        function copy_avx2!(x, y)
-            @_avx for i ∈ eachindex(x)
-                yᵢ = y[i]
-                x[i] = yᵢ
-            end
-        end
-        function make2point3avx!(x)
-            @avx for i ∈ eachindex(x)
-                x[i] = 2.3
-            end
-        end
-        function make2point3_avx!(x)
-            @_avx for i ∈ eachindex(x)
-                x[i] = 2.3
-            end
-        end
-        function myfillavx!(x, a)
-            @avx for i ∈ eachindex(x)
-                x[i] = a
-            end
-        end
-        function myfill_avx!(x, a)
-            @_avx for i ∈ eachindex(x)
-                x[i] = a
-            end
-        end
-
         function mysumavx(x)
             s = zero(eltype(x))
             @avx for i ∈ eachindex(x)
@@ -1145,41 +1102,6 @@ end
             @test q1 ≈ q2
             @test sum(q2; dims=3) ≈ ones(T,ni,nj)
 
-            fill!(q2, NaN); copyavx1!(q2, x)
-            @test x == q2
-            fill!(q2, NaN); copy_avx1!(q2, x)
-            @test x == q2
-            fill!(q2, NaN); copyavx2!(q2, x)
-            @test x == q2
-            fill!(q2, NaN); copy_avx2!(q2, x)
-            @test x == q2
-            fill!(q2, NaN); @avx q2 .= x;
-            @test x == q2
-
-            myfillavx!(x, -9829732.153);
-            fill!(q2, -9829732.153);
-            @test x == q2
-            myfill_avx!(x, 9732.153);
-            fill!(q2, 9732.153);
-            @test x == q2
-            myfill_avx!(x, 5);
-            fill!(q2, 5)
-            @test x == q2
-            myfillavx!(x, 5345);
-            fill!(q2, 5345)
-            @test x == q2
-            make2point3avx!(x)
-            fill!(q2, 2.3)
-            @test x == q2
-            fill!(x, NaN); make2point3_avx!(x)
-            @test x == q2        
-            @avx x .= 34;
-            fill!(q2, 34)
-            @test x == q2
-            @avx x .= 34.242;
-            fill!(q2, 34.242)
-            @test x == q2
-
             s = sum(x)
             @test s ≈ mysumavx(x)
             @test s ≈ mysum_avx(x)
@@ -1199,6 +1121,153 @@ end
         end
 end
 
+@time @testset "copy" begin
+
+    function copyavx1!(x, y)
+        @avx for i ∈ eachindex(x)
+            x[i] = y[i]
+        end
+    end
+    function copy_avx1!(x, y)
+        @_avx for i ∈ eachindex(x)
+            x[i] = y[i]
+        end
+    end
+    function copyavx2!(x, y)
+        @avx for i ∈ eachindex(x)
+            yᵢ = y[i]
+            x[i] = yᵢ
+        end
+    end
+    function copy_avx2!(x, y)
+        @_avx for i ∈ eachindex(x)
+            yᵢ = y[i]
+            x[i] = yᵢ
+        end
+    end
+    function offset_copy!(A, B)
+        @inbounds for i=1:size(A,1), j=1:size(B,2)
+	    A[i,j+2] = B[i,j]
+        end
+    end
+    function offset_copyavx1!(A, B)
+        @avx for i=1:size(A,1), j=1:size(B,2)
+	    A[i,j+2] = B[i,j]
+        end
+    end
+    function offset_copy_avx1!(A, B)
+        @_avx for i=1:size(A,1), j=1:size(B,2)
+	    A[i,j+2] = B[i,j]
+        end
+    end
+    function offset_copyavx2!(A, B)
+        @avx for i=1:size(A,1), j=1:size(B,2)
+            Bᵢⱼ = B[i,j]
+	    A[i,j+2] = Bᵢⱼ
+        end
+    end
+    function offset_copy_avx2!(A, B)
+        @_avx for i=1:size(A,1), j=1:size(B,2)
+            Bᵢⱼ = B[i,j]
+	    A[i,j+2] = Bᵢⱼ
+        end
+    end
+    function make2point3avx!(x)
+        @avx for i ∈ eachindex(x)
+            x[i] = 2.3
+        end
+    end
+    function make2point3_avx!(x)
+        @_avx for i ∈ eachindex(x)
+            x[i] = 2.3
+        end
+    end
+    function make23avx!(x)
+        @avx for i ∈ eachindex(x)
+            x[i] = 23
+        end
+    end
+    function make23_avx!(x)
+        @_avx for i ∈ eachindex(x)
+            x[i] = 23
+        end
+    end
+    function myfillavx!(x, a)
+        @avx for i ∈ eachindex(x)
+            x[i] = a
+        end
+    end
+    function myfill_avx!(x, a)
+        @_avx for i ∈ eachindex(x)
+            x[i] = a
+        end
+    end
+
+    for T ∈ (Float32, Float64, Int32, Int64)
+        @show T, @__LINE__
+        R = T <: Integer ? (-T(100):T(100)) : T 
+        x = rand(R, 237);
+        q1 = similar(x); q2 = similar(x);
+        
+        fill!(q2, -999999); copyavx1!(q2, x);
+        @test x == q2
+        fill!(q2, -999999); copy_avx1!(q2, x);
+        @test x == q2
+        fill!(q2, -999999); copyavx2!(q2, x);
+        @test x == q2
+        fill!(q2, -999999); copy_avx2!(q2, x);
+        @test x == q2
+        fill!(q2, -999999); @avx q2 .= x;
+        @test x == q2
+
+        B = rand(R, 79, 83);
+        A1 = zeros(T, 79, 85);
+        A2 = zeros(T, 79, 85);
+        offset_copy!(A1, B);
+        fill!(A2, 0); offset_copyavx1!(A2, B);
+        @test A1 == A2
+        fill!(A2, 0); offset_copyavx2!(A2, B);
+        @test A1 == A2
+        fill!(A2, 0); offset_copy_avx1!(A2, B);
+        @test A1 == A2
+        fill!(A2, 0); offset_copy_avx2!(A2, B);
+        @test A1 == A2
+        
+        a = rand(R)
+        myfillavx!(x, a);
+        fill!(q2, a);
+        @test x == q2
+        a = rand(R)
+        myfill_avx!(x, a);
+        fill!(q2, a);
+        @test x == q2
+        a = rand(R)
+        myfill_avx!(x, a);
+        fill!(q2, a);
+        @test x == q2
+        a = rand(R)
+        myfillavx!(x, a);
+        fill!(q2, a);
+        @test x == q2
+        if T <: Union{Float32,Float64}
+            make2point3avx!(x)
+            fill!(q2, 2.3)
+            @test x == q2
+            fill!(x, -999999); make2point3_avx!(x)
+            @test x == q2
+        end
+        a = rand(R)
+        @avx x .= a;
+        fill!(q2, a);
+        @test x == q2
+        a = rand(R)
+        @avx x .= a;
+        fill!(q2, a);
+        @test x == q2
+        
+    end
+end
+
 @time @testset "broadcast" begin
     M, N = 37, 47
     # M = 77;