JuliaSIMD
diff --git a/‎Project.toml
Lines changed: 5 additions & 5 deletions b/‎Project.toml
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/src/devdocs/loopset_structure.md
Lines changed: 1 addition & 1 deletion b/‎docs/src/devdocs/loopset_structure.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/src/devdocs/lowering.md
Lines changed: 1 addition & 1 deletion b/‎docs/src/devdocs/lowering.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/LoopVectorization.jl
Lines changed: 22 additions & 19 deletions b/‎src/LoopVectorization.jl
Lines changed: 22 additions & 19 deletions
diff --git a/‎src/condense_loopset.jl
Lines changed: 3 additions & 3 deletions b/‎src/condense_loopset.jl
Lines changed: 3 additions & 3 deletions
@@ -5,21 +5,21 @@ version = "0.8.26"
 
 [deps]
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
-SIMDPirates = "21efa798-c60a-11e8-04d3-e1a92915a26a"
 SLEEFPirates = "476501e8-09a2-5ece-8869-fb82de89a1fa"
 UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
 VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
 
 [compat]
 DocStringExtensions = "0.8"
+IfElse = "0"
 OffsetArrays = "1"
-SIMDPirates = "0.8.25"
-SLEEFPirates = "0.5.4"
+SLEEFPirates = "0.6"
 UnPack = "0,1"
-VectorizationBase = "0.12.31"
-julia = "1.1"
+VectorizationBase = "0.13"
+julia = "1.3"
 
 [extras]
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
@@ -26,7 +26,7 @@ julia> LoopVectorization.operations(lsAmulB)
  var"##reduction#260" = LoopVectorization.vfmadd_fast(var"##tempload#258", var"##tempload#259", var"##reduction#260")
  var"##RHS#256" = LoopVectorization.reduce_to_add(var"##reduction#260", var"##RHS#256")
 ```
-The act of performing a "reduction" across a loop introduces a few extra operations that manage creating a "zero" with respect to the reduction, and then combining with the specified value using `reduce_to_add`, which performs any necessary type conversions, such as from an `SVec` vector-type to a scalar, if necessary. This simplifies code generation, by making the functions agnostic with respect to the actual vectorization decisions the library makes.
+The act of performing a "reduction" across a loop introduces a few extra operations that manage creating a "zero" with respect to the reduction, and then combining with the specified value using `reduce_to_add`, which performs any necessary type conversions, such as from an `Vec` vector-type to a scalar, if necessary. This simplifies code generation, by making the functions agnostic with respect to the actual vectorization decisions the library makes.
 
 Each operation is listed as depending on a set of loop iteration symbols:
 ```julia
 
@@ -5,6 +5,6 @@ This task is made simpler via multiple dispatch making the lowering of the compo
 ```julia
 vload(vptr_A, (i,j,k))
 ```
-with the behavior of this load determined by the types of the arguments. Vectorization is expressed by making an index a `_MM{W}` type, rather than an integer, and operations with it will either produce another `_MM{W}` when it will still correspond to contiguous loads, or an `SVec{W,<:Integer}` if the resulting loads will be discontiguous, so that a `gather` or `scatter!` will be used. If all indexes are simply integers, then this produces a scalar load or store.
+with the behavior of this load determined by the types of the arguments. Vectorization is expressed by making an index a `_MM{W}` type, rather than an integer, and operations with it will either produce another `_MM{W}` when it will still correspond to contiguous loads, or an `Vec{W,<:Integer}` if the resulting loads will be discontiguous, so that a `gather` or `scatter!` will be used. If all indexes are simply integers, then this produces a scalar load or store.
 
 
@@ -1,22 +1,25 @@
 module LoopVectorization
 
-if (!isnothing(get(ENV, "TRAVIS_BRANCH", nothing)) || !isnothing(get(ENV, "APPVEYOR", nothing))) && isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@optlevel"))
-    @eval Base.Experimental.@optlevel 1
-end
+# if (!isnothing(get(ENV, "TRAVIS_BRANCH", nothing)) || !isnothing(get(ENV, "APPVEYOR", nothing))) && isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@optlevel"))
+    # @eval Base.Experimental.@optlevel 1
+# end
 
-using VectorizationBase, SIMDPirates, SLEEFPirates, UnPack, OffsetArrays
-using VectorizationBase: REGISTER_SIZE, extract_data, num_vector_load_expr,
-    mask, masktable, pick_vector_width_val, valmul, valrem, valmuladd, valmulsub, valadd, valsub, _MM,
-    maybestaticlength, maybestaticsize, staticm1, staticp1, staticmul, subsetview, vzero, stridedpointer_for_broadcast,
-    Static, Zero, StaticUnitRange, StaticLowerUnitRange, StaticUpperUnitRange, unwrap, maybestaticrange,
-    AbstractColumnMajorStridedPointer, AbstractRowMajorStridedPointer, AbstractSparseStridedPointer, AbstractStaticStridedPointer,
-    PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct, offsetprecalc,
-    maybestaticfirst, maybestaticlast, scalar_less, scalar_greater, noalias!, gesp, gepbyte, pointerforcomparison, NativeTypes, staticmul, staticmuladd
-using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange, 
-    reduced_add, reduced_prod, reduce_to_add, reduced_max, reduced_min, vsum, vprod, vmaximum, vminimum,
-    sizeequivalentfloat, sizeequivalentint, vadd!, vsub!, vmul!, vfdiv!, vfmadd!, vfnmadd!, vfmsub!, vfnmsub!,
-    vfmadd231, vfmsub231, vfnmadd231, vfnmsub231, sizeequivalentfloat, sizeequivalentint, #prefetch,
-    vmullog2, vmullog10, vdivlog2, vdivlog10, vmullog2add!, vmullog10add!, vdivlog2add!, vdivlog10add!, vfmaddaddone, vadd1, relu
+using VectorizationBase, SLEEFPirates, UnPack, OffsetArrays
+using VectorizationBase: REGISTER_SIZE, data,
+    mask, pick_vector_width_val, MM,
+    maybestaticlength, maybestaticsize, staticm1, staticp1, staticmul, vzero,
+    Zero, maybestaticrange, offsetprecalc,
+    maybestaticfirst, maybestaticlast, scalar_less, gesp, pointerforcomparison, NativeTypes, staticmul,
+    relu
+using IfElse: ifelse
+
+const Static = StaticInt
+# missing: subsetview, stridedpointer_for_broadcast, unwrap, StaticUnitRange, stridedpointers, noalias!, gepbyte, 
+# using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange, 
+#     reduced_add, reduced_prod, reduce_to_add, reduced_max, reduced_min, vsum, vprod, vmaximum, vminimum,
+#     sizeequivalentfloat, sizeequivalentint, vadd!, vsub!, vmul!, vfdiv!, vfmadd!, vfnmadd!, vfmsub!, vfnmsub!,
+#     vfmadd231, vfmsub231, vfnmadd231, vfnmsub231, sizeequivalentfloat, sizeequivalentint, #prefetch,
+#     vmullog2, vmullog10, vdivlog2, vdivlog10, vmullog2add!, vmullog10add!, vdivlog2add!, vdivlog10add!, vfmaddaddone, vadd1, relu
 using SLEEFPirates: pow
 using Base.Broadcast: Broadcasted, DefaultArrayStyle
 using LinearAlgebra: Adjoint, Transpose
@@ -46,7 +49,7 @@ If you want good performance, DO NOT use a 32-bit build of Julia if you don't ha
 const REGISTER_COUNT = Sys.ARCH === :i686 ? 8 : VectorizationBase.REGISTER_COUNT
 
 include("getconstindexes.jl")
-include("vectorizationbase_extensions.jl")
+# include("vectorizationbase_extensions.jl")
 include("predicates.jl")
 include("map.jl")
 include("filter.jl")
@@ -89,7 +92,7 @@ loop-reordering so as to improve performance:
 """
 LoopVectorization
 
-include("precompile.jl")
-_precompile_()
+# include("precompile.jl")
+# _precompile_()
 
 end # module
@@ -161,9 +161,9 @@ function loopset_return_value(ls::LoopSet, ::Val{extract}) where {extract}
         op = getop(ls, ls.outer_reductions[1])
         if extract
             if (isu₁unrolled(op) | isu₂unrolled(op))
-                Expr(:call, :extract_data, Symbol(mangledvar(op), 0))
+                Expr(:call, :data, Symbol(mangledvar(op), 0))
             else
-                Expr(:call, :extract_data, mangledvar(op))
+                Expr(:call, :data, mangledvar(op))
             end
         else
             Symbol(mangledvar(op), 0)
@@ -174,7 +174,7 @@ function loopset_return_value(ls::LoopSet, ::Val{extract}) where {extract}
         for or ∈ ls.outer_reductions
             op = ops[or]
             if extract
-                push!(ret.args, Expr(:call, :extract_data, Symbol(mangledvar(op), 0)))
+                push!(ret.args, Expr(:call, :data, Symbol(mangledvar(op), 0)))
             else
                 push!(ret.args, Symbol(mangledvar(ops[or]), 0))
             end