Update to VectorizationBase 0.14.

chriselrod · chriselrod · commit 806e50f71650 · 2020-12-16T10:54:16.000-05:00
diff --git a/Project.toml b/Project.toml
@@ -20,7 +20,7 @@ IfElse = "0.1"
 OffsetArrays = "1"
 SLEEFPirates = "0.6"
 UnPack = "1"
-VectorizationBase = "0.13.10"
+VectorizationBase = "0.14"
 julia = "1.5"
 
 [extras]
diff --git a/src/reconstruct_loopset.jl b/src/reconstruct_loopset.jl
@@ -394,17 +394,26 @@ function extract_external_functions!(ls::LoopSet, offset::Int)
 end
 function sizeofeltypes(v, num_arrays)::Int
     T = typeeltype(v[1])
-    if !VectorizationBase.SIMD_NATIVE_INTEGERS && T <: Integer # hack
-        return VectorizationBase.REGISTER_SIZE
+    sz = if (VectorizationBase.SIMD_INTEGER_REGISTER_SIZE != VectorizationBase.REGISTER_SIZE) && T <: Integer # hack
+        (VectorizationBase.REGISTER_SIZE ÷ VectorizationBase.SIMD_INTEGER_REGISTER_SIZE) * sizeof(T)
+    else
+        sz = sizeof(T)
     end
     for i ∈ 2:num_arrays
         Ttemp = typeeltype(v[i])
-        if !VectorizationBase.SIMD_NATIVE_INTEGERS && Ttemp <: Integer # hack
-            return VectorizationBase.REGISTER_SIZE
+        szᵢ = if (VectorizationBase.SIMD_INTEGER_REGISTER_SIZE != VectorizationBase.REGISTER_SIZE) && T <: Integer # hack
+            (VectorizationBase.REGISTER_SIZE ÷ VectorizationBase.SIMD_INTEGER_REGISTER_SIZE) * sizeof(T)
+        else
+            sizeof(Ttemp)
         end
-        T = promote_type(T, Ttemp)
+        # if !VectorizationBase.SIMD_NATIVE_INTEGERS && Ttemp <: Integer # hack
+        #     return VectorizationBase.REGISTER_SIZE
+        # end
+        # T = promote_type(T, Ttemp)
+        sz = max(szᵢ, sz)
     end
-    sizeof(T)
+    sz
+    # sizeof(T)
 end
 
 function avx_loopset(instr, ops, arf, AM, LPSYM, LB, @nospecialize(vargs))
diff --git a/test/miscellaneous.jl b/test/miscellaneous.jl
@@ -1028,47 +1028,47 @@ end
         @test out1 == out2
     end
 
-function smoothdim!(s, x, α, Rpre, irng::AbstractUnitRange, Rpost)
-    ifirst, ilast = first(irng), last(irng)
-    ifirst > ilast && return s
-    # @inbounds @fastmath for Ipost in Rpost
-    for Ipost in Rpost
-        # Initialize the first value along the filtered dimension
-        for Ipre in Rpre
-            s[Ipre, ifirst, Ipost] = x[Ipre, ifirst, Ipost]
-        end
-        # Handle all other entries
-        for i = ifirst+1:ilast
+    function smoothdim!(s, x, α, Rpre, irng::AbstractUnitRange, Rpost)
+        ifirst, ilast = first(irng), last(irng)
+        ifirst > ilast && return s
+        # @inbounds @fastmath for Ipost in Rpost
+        for Ipost in Rpost
+            # Initialize the first value along the filtered dimension
             for Ipre in Rpre
-                s[Ipre, i, Ipost] = α*x[Ipre, i, Ipost] + (1-α)*x[Ipre, i-1, Ipost]
+                s[Ipre, ifirst, Ipost] = x[Ipre, ifirst, Ipost]
+            end
+            # Handle all other entries
+            for i = ifirst+1:ilast
+                for Ipre in Rpre
+                    s[Ipre, i, Ipost] = α*x[Ipre, i, Ipost] + (1-α)*x[Ipre, i-1, Ipost]
+                end
             end
         end
+        s
     end
-    s
-end
-function smoothdim_avx!(s, x, α, Rpre, irng::AbstractUnitRange, Rpost)
-    ifirst, ilast = first(irng), last(irng)
-    ifirst > ilast && return s
-    @avx for Ipost in Rpost
-        for Ipre in Rpre
-            s[Ipre, ifirst, Ipost] = x[Ipre, ifirst, Ipost]
-            for i = ifirst+1:ilast
-                s[Ipre, i, Ipost] = α*x[Ipre, i, Ipost] + (1-α)*x[Ipre, i-1, Ipost]
+    function smoothdim_avx!(s, x, α, Rpre, irng::AbstractUnitRange, Rpost)
+        ifirst, ilast = first(irng), last(irng)
+        ifirst > ilast && return s
+        @avx for Ipost in Rpost
+            for Ipre in Rpre
+                s[Ipre, ifirst, Ipost] = x[Ipre, ifirst, Ipost]
+                for i = ifirst+1:ilast
+                    s[Ipre, i, Ipost] = α*x[Ipre, i, Ipost] + (1-α)*x[Ipre, i-1, Ipost]
+                end
             end
         end
+        s
     end
-    s
-end
-function smoothdim_ifelse_avx!(s, x, α, Rpre, irng::AbstractUnitRange, Rpost)
-    ifirst, ilast = first(irng), last(irng)
-    ifirst > ilast && return s
-    @avx for Ipost in Rpost, i = ifirst:ilast, Ipre in Rpre
-        xi = x[Ipre, i, Ipost]
-        xim = i > ifirst ? x[Ipre, i-1, Ipost] : xi
-        s[Ipre, i, Ipost] = α*xi + (1-α)*xim
+    function smoothdim_ifelse_avx!(s, x, α, Rpre, irng::AbstractUnitRange, Rpost)
+        ifirst, ilast = first(irng), last(irng)
+        ifirst > ilast && return s
+        @avx for Ipost in Rpost, i = ifirst:ilast, Ipre in Rpre
+            xi = x[Ipre, i, Ipost]
+            xim = i > ifirst ? x[Ipre, i-1, Ipost] : xi
+            s[Ipre, i, Ipost] = α*xi + (1-α)*xim
+        end
+        s
     end
-    s
-end
 
     for T ∈ (Float32, Float64)
         @testset "Mixed CartesianIndex/Int indexing" begin
@@ -1103,33 +1103,33 @@ end
     end
 
 
-function mul1!(y::Vector{T}, A::Matrix{UInt8}, x::Vector{T}) where T 
-    packedstride = size(A, 1)
-    m, n = size(A)
-    @avx for j ∈ eachindex(x)
-        for i ∈ eachindex(y)
-            k = 2 * ((i-1) & 3)
-            block = A[(j-1) * packedstride + ((i-1) >> 2) + 1]
-            Aij = (block >> k) & 3
-            y[i] += (((Aij >= 2) + (Aij >= 3))) * x[j]
+    function mul1!(y::Vector{T}, A::Matrix{UInt8}, x::Vector{T}) where T 
+        packedstride = size(A, 1)
+        m, n = size(A)
+        @avx for j ∈ eachindex(x)
+            for i ∈ eachindex(y)
+                k = 2 * ((i-1) & 3)
+                block = A[(j-1) * packedstride + ((i-1) >> 2) + 1]
+                Aij = (block >> k) & 3
+                y[i] += (((Aij >= 2) + (Aij >= 3))) * x[j]
+            end
+        end
+        y
+    end
+    function mul2!(y::Vector{T}, A::Matrix{UInt8}, x::Vector{T}) where T 
+        packedstride = size(A, 1)
+        m, n = size(A)
+        for j ∈ eachindex(x)
+            for i ∈ eachindex(y)
+                k = 2 * ((i-1) & 3)
+                block = A[(j-1) * packedstride + ((i-1) >> 2) + 1]
+                Aij = (block >> k) & 3
+                y[i] += (((Aij >= 2) + (Aij >= 3))) * x[j]
+            end
         end
+        y
     end
-    y
-end
-function mul2!(y::Vector{T}, A::Matrix{UInt8}, x::Vector{T}) where T 
-    packedstride = size(A, 1)
-    m, n = size(A)
-    for j ∈ eachindex(x)
-        for i ∈ eachindex(y)
-            k = 2 * ((i-1) & 3)
-            block = A[(j-1) * packedstride + ((i-1) >> 2) + 1]
-            Aij = (block >> k) & 3
-            y[i] += (((Aij >= 2) + (Aij >= 3))) * x[j]
-        end
-    end
-    y
-end
-if Base.libllvm_version ≥ v"8" || LoopVectorization.VectorizationBase.SIMD_NATIVE_INTEGERS
+
     @testset "UInt8 mul" begin
         for n in 1:200
             v1 = rand(n); v3 =copy(v1);
@@ -1138,17 +1138,16 @@ if Base.libllvm_version ≥ v"8" || LoopVectorization.VectorizationBase.SIMD_NAT
             @test mul1!(v1, A, v2) ≈ mul2!(v3, A, v2)
         end
     end
-end
 
-@test_throws LoadError @macroexpand begin # pull #172
-    @avx for i in eachindex(xs)
-        if i in axes(ys,1)
-            xs[i] = ys[i]
-        else
-            xs[i] = zero(eltype(ys))
+    @test_throws LoadError @macroexpand begin # pull #172
+        @avx for i in eachindex(xs)
+            if i in axes(ys,1)
+                xs[i] = ys[i]
+            else
+                xs[i] = zero(eltype(ys))
+            end
         end
     end
-end
 
 end