Fix bug in add_or and add test.

chriselrod · chriselrod · commit e13c99089715 · 2020-02-10T22:46:18.000-05:00
diff --git a/src/LoopVectorization.jl b/src/LoopVectorization.jl
@@ -6,7 +6,8 @@ using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, extract_data, num_vector
     maybestaticlength, maybestaticsize, staticm1, subsetview, vzero, stridedpointer_for_broadcast,
     Static, StaticUnitRange, StaticLowerUnitRange, StaticUpperUnitRange,
     PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct
-using SIMDPirates: VECTOR_SYMBOLS, evadd, evmul, vrange, reduced_add, reduced_prod, reduce_to_add, reduce_to_prod#,
+using SIMDPirates: VECTOR_SYMBOLS, evadd, evmul, vrange, reduced_add, reduced_prod, reduce_to_add, reduce_to_prod,
+    sizeequivalentfloat, sizeequivalentint
 #    vmullog2, vmullog10, vdivlog2, vdivlog2add, vdivlog10, vdivlog10add, vfmaddaddone
 using Base.Broadcast: Broadcasted, DefaultArrayStyle
 using LinearAlgebra: Adjoint, Transpose
diff --git a/src/add_ifelse.jl b/src/add_ifelse.jl
@@ -65,7 +65,7 @@ function add_orblock!(ls::LoopSet, condop::Operation, LHS, RHS::Expr, elementbyt
     add_orblock!(ls, condop, LHS, rhsop, elementbytes, position)
 end
 function add_orblock!(ls::LoopSet, condop::Operation, LHS, RHS, elementbytes::Int, position::Int)
-    rhsop = getop(ls, RHS)
+    rhsop = getop(ls, RHS, elementbytes)
     add_orblock!(ls, condop, LHS, rhsop, elementbytes, position)
 end
 function add_orblock!(ls::LoopSet, condexpr::Expr, condeval::Expr, elementbytes::Int, position::Int)
diff --git a/src/lower_constant.jl b/src/lower_constant.jl
@@ -1,59 +1,22 @@
 
-@inline zerointeger(::Type{Float16}) = zero(Int16)
-@inline zerointeger(::Type{Float32}) = zero(Int32)
-@inline zerointeger(::Type{Float64}) = zero(Int64)
-@inline zerointeger(::Type{I}) where {I<:Integer} = zero(I)
-@inline zerofloat(::Type{Float16}) = zero(Float16)
-@inline zerofloat(::Type{Float32}) = zero(Float32)
-@inline zerofloat(::Type{Float64}) = zero(Float64)
-@inline zerofloat(::Type{UInt16}) = zero(Float16)
-@inline zerofloat(::Type{UInt32}) = zero(Float32)
-@inline zerofloat(::Type{UInt64}) = zero(Float64)
-@inline zerofloat(::Type{Int16}) = zero(Float16)
-@inline zerofloat(::Type{Int32}) = zero(Float32)
-@inline zerofloat(::Type{Int64}) = zero(Float64)
+@inline onefloat(::Type{T}) where {T} = one(sizeequivalentfloat(T))
+@inline oneinteger(::Type{T}) where {T} = one(sizeequivalentint(T))
+@inline zerofloat(::Type{T}) where {T} = zero(sizeequivalentfloat(T))
+@inline zerointeger(::Type{T}) where {T} = zero(sizeequivalentint(T))
 
 
-@inline oneinteger(::Type{Float16}) = one(Int16)
-@inline oneinteger(::Type{Float32}) = one(Int32)
-@inline oneinteger(::Type{Float64}) = one(Int64)
-@inline oneinteger(::Type{I}) where {I<:Integer} = one(I)
-@inline onefloat(::Type{Float16}) = one(Float16)
-@inline onefloat(::Type{Float32}) = one(Float32)
-@inline onefloat(::Type{Float64}) = one(Float64)
-@inline onefloat(::Type{UInt16}) = one(Float16)
-@inline onefloat(::Type{UInt32}) = one(Float32)
-@inline onefloat(::Type{UInt64}) = one(Float64)
-@inline onefloat(::Type{Int16}) = one(Float16)
-@inline onefloat(::Type{Int32}) = one(Float32)
-@inline onefloat(::Type{Int64}) = one(Float64)
-
-@inline equivalentint(::Type{I}) where {I<:Integer} = I
-@inline equivalentint(::Type{Float16}) = Int16
-@inline equivalentint(::Type{Float32}) = Int32
-@inline equivalentint(::Type{Float64}) = Int64
-@inline equivalentfloat(::Type{Float16}) = Float16
-@inline equivalentfloat(::Type{Float32}) = Float64
-@inline equivalentfloat(::Type{Float64}) = Float64
-@inline equivalentfloat(::Type{Int16}) = Float16
-@inline equivalentfloat(::Type{Int32}) = Float64
-@inline equivalentfloat(::Type{Int64}) = Float64
-@inline equivalentfloat(::Type{UInt16}) = Float16
-@inline equivalentfloat(::Type{UInt32}) = Float64
-@inline equivalentfloat(::Type{UInt64}) = Float64
-
 function lower_zero!(
     q::Expr, op::Operation, vectorized::Symbol, ls::LoopSet, unrolled::Symbol, U::Int, suffix::Union{Nothing,Int}, zerotyp::NumberType = zerotype(ls, op)
 )
     W = ls.W; typeT = ls.T
     mvar = variable_name(op, suffix)
     if zerotyp == HardInt
         newtypeT = gensym(:IntType)
-        pushpreamble!(ls, Expr(:(=), newtypeT, Expr(:call, lv(:equivalentint), typeT)))
+        pushpreamble!(ls, Expr(:(=), newtypeT, Expr(:call, lv(:sizeequivalentint), typeT)))
         typeT = newtypeT
     elseif zerotyp == HardFloat
         newtypeT = gensym(:FloatType)
-        pushpreamble!(ls, Expr(:(=), newtypeT, Expr(:call, lv(:equivalentfloat), typeT)))
+        pushpreamble!(ls, Expr(:(=), newtypeT, Expr(:call, lv(:sizeequivalentfloat), typeT)))
         typeT = newtypeT
     end
     if vectorized ∈ loopdependencies(op) || vectorized ∈ reducedchildren(op) || vectorized ∈ reduceddependencies(op)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1395,6 +1395,7 @@ end
         fill!(D2, -999999); D2 = @avx C .+ At' *ˡ B;
         @test D1 ≈ D2
         if T <: Union{Float32,Float64}
+            @show T, @__LINE__
             D3 = cos.(B');
             D4 = @avx cos.(B');
             @test D3 ≈ D4
@@ -1541,10 +1542,41 @@ end
             C[m,n] > 0 && (C[m,n] = Cₘₙ)
         end
     end
+    function condstore!(y, x)
+        @inbounds for i ∈ eachindex(y, x)
+            x1 = x[i]
+            x2 = x1*x1
+            x3 = x2 + x1
+            y[i] = x1
+            (x1 < 30) && (y[i] = x2)
+            (x1 < 80) || (y[i] = x3)
+        end
+    end
+    function condstoreavx!(y, x)
+        @avx for i ∈ eachindex(y, x)
+            x1 = x[i]
+            x2 = x1*x1
+            x3 = x2 + x1
+            y[i] = x1
+            (x1 < 30) && (y[i] = x2)
+            (x1 < 80) || (y[i] = x3)
+        end
+    end
+    function condstore_avx!(y, x)
+        @_avx for i ∈ eachindex(y, x)
+            x1 = x[i]
+            x2 = x1*x1
+            x3 = x2 + x1
+            y[i] = x1
+            (x1 < 30) && (y[i] = x2)
+            (x1 < 80) || (y[i] = x3)
+        end
+    end
 
 
     N = 117
     @time for T ∈ (Float32, Float64, Int32, Int64)
+        @show T, @__LINE__
         if T <: Integer
             a = rand(-T(100):T(100), N); b = rand(-T(100):T(100), N);
         else
@@ -1569,6 +1601,16 @@ end
         fill!(c2, -999999999); maybewriteoravx!(c2, a, b)
         @test c1 ≈ c2
 
+        if T <: Union{Float32,Float64}
+            a .*= 100;
+        end
+        b2 = similar(b);
+        condstore!(b, a)
+        condstoreavx!(b2, a)
+        @test b == b2
+        fill!(b2, -999999); condstore_avx!(b2, a)
+        @test b == b2
+
         M, K, N = 83, 85, 79;
         if T <: Integer
             A = rand(T(-100):T(100), K, M);