Merge branch 'master' of https://github.com/chriselrod/LoopVectorization.jl

chriselrod · chriselrod · commit e1b43eb47971 · 2020-05-11T10:25:16.000-04:00
diff --git a/src/LoopVectorization.jl b/src/LoopVectorization.jl
@@ -8,7 +8,7 @@ using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, extract_data, num_vector
     AbstractColumnMajorStridedPointer, AbstractRowMajorStridedPointer, AbstractSparseStridedPointer, AbstractStaticStridedPointer,
     PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct,
     maybestaticfirst, maybestaticlast, scalar_less, scalar_greater
-using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange, reduced_add, reduced_prod, reduced_max, reduced_min, vsum, vprod, vmaximum, vminimum,
+using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange, reduced_add, reduced_prod, reduce_to_add, reduced_max, reduced_min, vsum, vprod, vmaximum, vminimum,
     sizeequivalentfloat, sizeequivalentint, vadd!, vsub!, vmul!, vfdiv!, vfmadd!, vfnmadd!, vfmsub!, vfnmsub!,
     vfmadd231, vfmsub231, vfnmadd231, vfnmsub231, sizeequivalentfloat, sizeequivalentint, #prefetch,
     vmullog2, vmullog10, vdivlog2, vdivlog10, vmullog2add!, vmullog10add!, vdivlog2add!, vdivlog10add!, vfmaddaddone
diff --git a/src/add_compute.jl b/src/add_compute.jl
@@ -161,19 +161,9 @@ function add_reduction_update_parent!(
         reductinit = add_constant!(ls, gensym(:reductzero), loopdependencies(parent), reductsym, elementbytes, :numericconstant)
         if reduct_zero === :zero
             push!(ls.preamble_zeros, (identifier(reductinit), IntOrFloat))
-        elseif reduct_zero === :one
-            push!(ls.preamble_ones, (identifier(reductinit), IntOrFloat))
         else
-            if reductzero === :true || reductzero === :false
-                pushpreamble!(ls, Expr(:(=), name(reductinit), reductzero))
-            else
-                pushpreamble!(ls, Expr(:(=), name(reductinit), Expr(:call, reductzero, ls.T)))
-            end
-            pushpreamble!(ls, op, name, reductinit)
+            push!(ls.preamble_funcofeltypes, (identifier(reductinit), reduct_zero))
         end
-        # if 
-            # reductcombine = reduction_combine_to(instrclass)
-        # end
     else
         reductinit = parent
         reductsym = var
@@ -328,7 +318,7 @@ function add_pow!(
     end
     if pint == 0
         op = Operation(length(operations(ls)), var, elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS)
-        push!(ls.preamble_ones, (identifier(op),IntOrFloat))
+        push!(ls.preamble_funcofeltypes, (identifier(op),:one))
         return pushop!(ls, op)
     elseif pint == 1
         return add_compute!(ls, var, :identity, [xop], elementbytes)
diff --git a/src/add_constants.jl b/src/add_constants.jl
@@ -17,11 +17,6 @@ function add_constant!(ls::LoopSet, var::Number, elementbytes::Int = 8)
             (instruction(ops[id]) === LOOPCONSTANT && typ == typ_) && return ops[id]
         end
         push!(ls.preamble_zeros, (identifier(op),typ))
-    elseif isone(var)
-        for (id,typ_) ∈ ls.preamble_ones
-            (instruction(ops[id]) === LOOPCONSTANT && typ == typ_) && return ops[id]
-        end
-        push!(ls.preamble_ones, (identifier(op),typ))
     elseif var isa Integer
         for (id,ivar) ∈ ls.preamble_symint
             (instruction(ops[id]) === LOOPCONSTANT && ivar == var) && return ops[id]
diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl
@@ -150,7 +150,7 @@ function argmeta_and_consts_description(ls::LoopSet, arraysymbolinds)
         Expr(:curly, :Tuple, ls.preamble_symint...),
         Expr(:curly, :Tuple, ls.preamble_symfloat...),
         Expr(:curly, :Tuple, ls.preamble_zeros...),
-        Expr(:curly, :Tuple, ls.preamble_ones...)
+        Expr(:curly, :Tuple, ls.preamble_funcofeltypes...)
     )
 end
 
diff --git a/src/costs.jl b/src/costs.jl
@@ -146,6 +146,8 @@ const COST = Dict{Symbol,InstructionCost}(
     :vprod => InstructionCost(6,2.0),
     :reduced_add => InstructionCost(4,0.5),# ignoring reduction part of cost, might be nop
     :reduced_prod => InstructionCost(4,0.5),# ignoring reduction part of cost, might be nop
+    :reduced_max => InstructionCost(4,0.5),# ignoring reduction part of cost, might be nop
+    :reduced_min => InstructionCost(4,0.5),# ignoring reduction part of cost, might be nop
     :reduce_to_add => InstructionCost(0,0.0,0.0,0),
     :reduce_to_prod => InstructionCost(0,0.0,0.0,0),
     :abs => InstructionCost(1, 0.5),
diff --git a/src/graphs.jl b/src/graphs.jl
@@ -172,7 +172,7 @@ struct LoopSet
     preamble_symint::Vector{Tuple{Int,Int}}
     preamble_symfloat::Vector{Tuple{Int,Float64}}
     preamble_zeros::Vector{Tuple{Int,NumberType}}
-    preamble_ones::Vector{Tuple{Int,NumberType}}
+    preamble_funcofeltypes::Vector{Tuple{Int,Symbol}}
     includedarrays::Vector{Symbol}
     includedactualarrays::Vector{Symbol}
     syms_aliasing_refs::Vector{Symbol}
@@ -221,7 +221,7 @@ function pushpreamble!(ls::LoopSet, op::Operation, v::Number)
     if iszero(v)
         push!(ls.preamble_zeros, (id, typ))
     elseif isone(v)
-        push!(ls.preamble_ones, (id, typ))
+        push!(ls.preamble_funcofeltypes, (id, :one))
     elseif v isa Integer
         push!(ls.preamble_symint, (id, convert(Int,v)))
     else
@@ -234,7 +234,7 @@ function pushpreamble!(ls::LoopSet, op::Operation, RHS::Expr)
     if RHS.head === :call && first(RHS.args) === :zero
         push!(ls.preamble_zeros, (identifier(op), IntOrFloat))
     elseif RHS.head === :call && first(RHS.args) === :one
-        push!(ls.preamble_ones, (identifier(op), IntOrFloat))
+        push!(ls.preamble_funcofeltypes, (identifier(op), :one))
     else
         pushpreamble!(ls, Expr(:(=), c, RHS))
         pushpreamble!(ls, op, c)
@@ -248,20 +248,6 @@ function zerotype(ls::LoopSet, op::Operation)
     end
     INVALID
 end
-# function Base.iszero(ls::LoopSet, op::Operation)
-#     opid = identifier(op)
-#     for (id,_) ∈ ls.preamble_zeros
-#         opid == id && return true
-#     end
-#     false
-# end
-# function Base.isone(ls::LoopSet, op::Operation)
-#     opid = identifier(op)
-#     for (id,_) ∈ ls.preamble_ones
-#         opid == id && return true
-#     end
-#     false
-# end
 
 includesarray(ls::LoopSet, array::Symbol) = array ∈ ls.includedarrays
 
@@ -497,7 +483,11 @@ function add_operation!(
         elseif f === :zero || f === :one
             c = gensym(f)
             op = add_constant!(ls, c, ls.loopsymbols[1:position], LHS, elementbytes, :numericconstant)
-            push!(f === :zero ? ls.preamble_zeros : ls.preamble_ones, (identifier(op), IntOrFloat))
+            if f === :zero
+                push!(ls.preamble_zeros, (identifier(op), IntOrFloat))
+            else
+                push!(ls.preamble_funcofeltypes, (identifier(op), :one))
+            end
             op
         else
             add_compute!(ls, LHS, RHS, elementbytes, position)
@@ -525,7 +515,11 @@ function add_operation!(
         elseif f === :zero || f === :one
             c = gensym(f)
             op = add_constant!(ls, c, ls.loopsymbols[1:position], LHS_sym, elementbytes, :numericconstant)
-            push!(f === :zero ? ls.preamble_zeros : ls.preamble_ones, (identifier(op), IntOrFloat))
+            if f === :zero
+                push!(ls.preamble_zeros, (identifier(op), IntOrFloat))
+            else
+                push!(ls.preamble_funcofeltypes, (identifier(op), :one))
+            end
             op
         else
             add_compute!(ls, LHS_sym, RHS, elementbytes, position, LHS_ref)
diff --git a/src/lower_constant.jl b/src/lower_constant.jl
@@ -1,6 +1,6 @@
 
-@inline onefloat(::Type{T}) where {T} = one(sizeequivalentfloat(T))
-@inline oneinteger(::Type{T}) where {T} = one(sizeequivalentint(T))
+# @inline onefloat(::Type{T}) where {T} = one(sizeequivalentfloat(T))
+# @inline oneinteger(::Type{T}) where {T} = one(sizeequivalentint(T))
 @inline zerofloat(::Type{T}) where {T} = zero(sizeequivalentfloat(T))
 @inline zerointeger(::Type{T}) where {T} = zero(sizeequivalentint(T))
 
@@ -64,6 +64,12 @@ function lower_constant!(
                 Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:SIMDPirates)), QuoteNode(:addscalar)), Expr(:call, lv(:vzero), VECTORWIDTHSYMBOL, ELTYPESYMBOL), constsym)
             elseif instrclass == MULTIPLICATIVE_IN_REDUCTIONS
                 Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:SIMDPirates)), QuoteNode(:mulscalar)), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :one, ELTYPESYMBOL)), constsym)
+            elseif instrclass == MAX
+                Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:SIMDPirates)), QuoteNode(:maxscalar)), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :typemin, ELTYPESYMBOL)), constsym)
+                
+            elseif instrclass == MIN
+                Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:SIMDPirates)), QuoteNode(:minscalar)), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :typemax, ELTYPESYMBOL)), constsym)
+                
             else
                 throw("Reductions of type $(reduction_zero(reinstrclass)) not yet supported; please file an issue as a reminder to take care of this.")
             end
@@ -132,14 +138,8 @@ function lower_licm_constants!(ls::LoopSet)
             setconstantop!(ls, ops[id], Expr(:call, lv(:zerofloat), ELTYPESYMBOL))
         end
     end
-    for (id,typ) ∈ ls.preamble_ones
-        if typ == IntOrFloat
-            setop!(ls, ops[id], Expr(:call, :one, ELTYPESYMBOL))
-        elseif typ == HardInt
-            setop!(ls, ops[id], Expr(:call, lv(:oneinteger), ELTYPESYMBOL))
-        else#if typ == HardFloat
-            setop!(ls, ops[id], Expr(:call, lv(:onefloat), ELTYPESYMBOL))
-        end
+    for (id,f) ∈ ls.preamble_funcofeltypes
+        setop!(ls, ops[id], Expr(:call, f, ELTYPESYMBOL))
     end
 end
 
diff --git a/src/reconstruct_loopset.jl b/src/reconstruct_loopset.jl
@@ -203,7 +203,7 @@ function process_metadata!(ls::LoopSet, AM, num_arrays::Int)
     expandbyoffset!(ls.preamble_symint, AM[4].parameters, opoffsets)
     expandbyoffset!(ls.preamble_symfloat, AM[5].parameters, opoffsets)
     expandbyoffset!(ls.preamble_zeros, AM[6].parameters, opoffsets)
-    expandbyoffset!(ls.preamble_ones, AM[7].parameters, opoffsets)
+    expandbyoffset!(ls.preamble_funcofeltypes, AM[7].parameters, opoffsets)
     nothing
 end
 function expandbyoffset!(indexpand::Vector{T}, inds, offsets::Vector{Int}, expand::Bool = true) where {T <: Union{Int,Tuple{Int,<:Any}}}
diff --git a/src/split_loops.jl b/src/split_loops.jl
@@ -43,7 +43,7 @@ function split_loopset(ls::LoopSet, ids)
     append_if_included!(ls_new.preamble_symint, ls.preamble_symint, included)
     append_if_included!(ls_new.preamble_symfloat, ls.preamble_symfloat, included)
     append_if_included!(ls_new.preamble_zeros, ls.preamble_zeros, included)
-    append_if_included!(ls_new.preamble_ones, ls.preamble_ones, included)
+    append_if_included!(ls_new.preamble_funcofeltypes, ls.preamble_funcofeltypes, included)
     ls_new
 end
 
diff --git a/test/mapreduce.jl b/test/mapreduce.jl
@@ -1,4 +1,5 @@
 
+
 @testset "mapreduce" begin
     function maximum_avx(x)
         s = typemin(eltype(x))
@@ -8,23 +9,34 @@
         s
     end
     for T ∈ (Int32, Int64, Float32, Float64)
+        @show T, @__LINE__
         if T <: Integer
             R = T(1):T(100)
             x7 = rand(R, 7); y7 = rand(R, 7);
             x = rand(R, 127); y = rand(R, 127);
         else
             x7 = rand(T, 7); y7 = rand(T, 7);
             x = rand(T, 127); y = rand(T, 127);
-            @test vmapreduce(hypot, +, x, y) ≈ mapreduce(hypot, +, x, y)
-            @test vmapreduce(^, (a,b) -> a + b, x7, y7) ≈ mapreduce(^, (a,b) -> a + b, x7, y7)
+            if VERSION ≥ v"1.4"
+                @test vmapreduce(hypot, +, x, y) ≈ mapreduce(hypot, +, x, y)
+                @test vmapreduce(^, (a,b) -> a + b, x7, y7) ≈ mapreduce(^, +, x7, y7)
+            else
+                @test vmapreduce(hypot, +, x, y) ≈ sum(hypot.(x, y))
+                @test vmapreduce(^, (a,b) -> a + b, x7, y7) ≈ sum(x7 .^ y7)
+            end
         end
         @test vreduce(+, x7) ≈ sum(x7)
         @test vreduce(+, x) ≈ sum(x)
         @test_throws AssertionError vmapreduce(hypot, +, x7, x)
-        @test vmapreduce(a -> 2a, *, x) ≈ mapreduce(a -> 2a, *, x)
-        @test vmapreduce(sin, +, x7) ≈ mapreduce(sin, +, x7)
-        @test vmapreduce(log, +, x) ≈ mapreduce(log, +, x)
-        @test vmapreduce(abs2, +, x) ≈ mapreduce(abs2, +, x)
+        if VERSION ≥ v"1.4"
+            @test vmapreduce(a -> 2a, *, x) ≈ mapreduce(a -> 2a, *, x)
+            @test vmapreduce(sin, +, x7) ≈ mapreduce(sin, +, x7)
+        else
+            @test vmapreduce(a -> 2a, *, x) ≈ prod(2 .* x)
+            @test vmapreduce(sin, +, x7) ≈ sum(sin.(x7))
+        end
+        @test vmapreduce(log, +, x) ≈ sum(log, x)
+        @test vmapreduce(abs2, +, x) ≈ sum(abs2, x)
         @test maximum(x) == vreduce(max, x) == maximum_avx(x)
     end
 
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -45,7 +45,7 @@ Base.IndexStyle(::Type{<:FallbackArrayWrapper}) = IndexLinear()
 
     @time include("filter.jl")
     
-    VERSION ≥ v"1.3" && @time include("mapreduce.jl")
+    @time include("mapreduce.jl")
 
     @time include("ifelsemasks.jl")
 

Original file line number	Diff line number	Diff line change
`@@ -150,7 +150,7 @@ function argmeta_and_consts_description(ls::LoopSet, arraysymbolinds)`
`150`	`150`	`Expr(:curly, :Tuple, ls.preamble_symint...),`
`151`	`151`	`Expr(:curly, :Tuple, ls.preamble_symfloat...),`
`152`	`152`	`Expr(:curly, :Tuple, ls.preamble_zeros...),`
`153`		`- Expr(:curly, :Tuple, ls.preamble_ones...)`
	`153`	`+ Expr(:curly, :Tuple, ls.preamble_funcofeltypes...)`
`154`	`154`	`)`
`155`	`155`	`end`
`156`	`156`