Don't throw an error if a tiling order is degenerate, just reject that order.

chriselrod · chriselrod · commit 2307d26fc7ea · 2019-12-19T03:28:06.000-05:00
diff --git a/Manifest.toml b/Manifest.toml
@@ -61,15 +61,15 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [[SIMDPirates]]
 deps = ["MacroTools", "VectorizationBase"]
-git-tree-sha1 = "72c002943060fe3518b77faf671a986652ca8f44"
+git-tree-sha1 = "3e45c76dfcc349ff208a955e1ce6e92b1be6d15e"
 repo-rev = "master"
 repo-url = "https://github.com/chriselrod/SIMDPirates.jl"
 uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
 version = "0.1.0"
 
 [[SLEEFPirates]]
 deps = ["SIMDPirates", "VectorizationBase"]
-git-tree-sha1 = "42cbc7f06b1f2063fc08b2aa2f8cd2e70d1e91bc"
+git-tree-sha1 = "ba032bbcc7038853867119f4cac383a0051b62a8"
 repo-rev = "master"
 repo-url = "https://github.com/chriselrod/SLEEFPirates.jl"
 uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
@@ -87,7 +87,7 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [[VectorizationBase]]
 deps = ["CpuId", "LinearAlgebra"]
-git-tree-sha1 = "913138bbc3e1892fbdd379fa48cafffe46a445c2"
+git-tree-sha1 = "1cc48a9bce5c18f2f70fa16cc5b2b39b39332a9e"
 repo-rev = "master"
 repo-url = "https://github.com/chriselrod/VectorizationBase.jl"
 uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
diff --git a/src/determinestrategy.jl b/src/determinestrategy.jl
@@ -150,7 +150,8 @@ function tile_cost(X, U, T)
     X[1] + X[4] + X[2] / T + X[3] / U
 end
 function solve_tilesize(X, R)
-    first(R) == 0 && return -1,-1,Inf #solve_smalltilesize(X, R, Umax, Tmax)
+    @inbounds any(iszero, (R[1],R[2],R[3])) && return -1,-1,Inf #solve_smalltilesize(X, R, Umax, Tmax)
+    # @inbounds any(iszero, (R[1],R[2],R[3])) && return -1,-1,Inf #solve_smalltilesize(X, R, Umax, Tmax)
     # We use lagrange multiplier to finding floating point values for U and T
     # first solving for U via quadratic formula
     # X is vector of costs, and R is of register pressures
@@ -163,6 +164,7 @@ function solve_tilesize(X, R)
     Ufloat = (sqrt(b^2 - 4a*c) - b) / (2a)
     Tfloat = (RR - Ufloat*R[2])/(Ufloat*R[1])
     # @show Ufloat, Tfloat
+    (isfinite(Tfloat) && isfinite(Ufloat)) || return -1,-1,Inf
     Ulow = max(1, floor(Int, Ufloat)) # must be at least 1
     Tlow = max(1, floor(Int, Tfloat)) # must be at least 1
     Uhigh = Ulow + 1 #ceil(Int, Ufloat)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -75,7 +75,9 @@ function mygemmavx!(C, A, B)
         C[i,j] = Cᵢⱼ
     end
 end
-C = Matrix{Float64}(undef, 100, 100); A = randn(100, 100); B = randn(100, 100);
+
+M, K, N = rand(70:81, 3);
+C = Matrix{Float64}(undef, M, N); A = randn(M, K); B = randn(K, N);
 C2 = similar(C);
 mygemmavx!(C, A, B)
 mygemm!(C2, A, B)
@@ -97,7 +99,7 @@ LoopVectorization.choose_unroll_order(lsgemm)
 ops = LoopVectorization.oporder(lsgemm);
 findall(length.(ops) .!= 0)
 
-dotq = :(for i ∈ eachindex(a)
+dotq = :(for i ∈ eachindex(a,b)
          s += a[i]*b[i]
          end)
 lsdot = LoopVectorization.LoopSet(dotq);
@@ -106,17 +108,15 @@ LoopVectorization.lower(lsdot)
 lsdot.operations
 
 function mydot(a, b)
-    @assert length(a) == length(b) "Both arrays must be of equal length."
     s = 0.0
-    @inbounds @simd for i ∈ eachindex(a)
+    @inbounds @simd for i ∈ eachindex(a,b)
         s += a[i]*b[i]
     end
     s
 end
 function mydotavx(a, b)
-    @assert length(a) == length(b) "Both arrays must be of equal length."
     s = 0.0
-    @avx for i ∈ eachindex(a)
+    @avx for i ∈ eachindex(a,b)
         s += a[i]*b[i]
     end
     s
@@ -153,13 +153,12 @@ function myselfdotavx(a)
     s
 end
 
-a = rand(400); b = rand(400);
+a = rand(400);
 @test myselfdotavx(a) ≈ myselfdot(a)
 
 @benchmark myselfdotavx($a)
 @benchmark myselfdot($a)
 
-b = rand(43);
 @benchmark myselfdotavx($b)
 @benchmark myselfdot($b)
 
@@ -192,6 +191,9 @@ b2'
 all(b1 .≈ b2)
 @test all(b1 .≈ b2)
 
+@benchmark myvexp!($b1, $a)
+@benchmark myvexpavx!($b2, $a)
+
 
 vexpsq = :(for i ∈ eachindex(a)
           s += exp(a[i])
@@ -218,7 +220,8 @@ end
 
 @test myvexp(a) ≈ myvexpavx(a)
 
-
+@benchmark myvexp($a)
+@benchmark myvexpavx($a)
 
 gemvq = :(for i ∈ eachindex(y)
           yᵢ = 0.0
@@ -258,6 +261,43 @@ mygemvavx!(y2, A, x)
 
 @test all(y1 .≈ y2)
 
+@benchmark mygemv!($y1, $A, $x)
+@benchmark mygemvavx!($y2, $A, $x)
+
+subcolq = :(for i ∈ 1:size(A,2), j ∈ eachindex(x)
+            B[j,i] = A[j,i] - x[j]
+            end)
+lssubcol = LoopVectorization.LoopSet(subcolq);
+@test LoopVectorization.choose_order(lssubcol) == (Symbol[:j,:i], 4, -1)
+LoopVectorization.lower(lssubcol)
+
+function mysubcol!(B, A, x)
+    @inbounds for i ∈ 1:size(A,2)
+        @simd for j ∈ eachindex(x)
+            B[j,i] = A[j,i] - x[j]
+        end
+    end
+end
+function mysubcolavx!(B, A, x)
+    @avx for i ∈ 1:size(A,2), j ∈ eachindex(x)
+        B[j,i] = A[j,i] - x[j]
+    end
+end
+A = randn(199, 498); x = randn(size(A,1));
+B1 = similar(A); B2 = similar(A);
+
+mysubcol!(B1, A, x)
+mysubcolavx!(B2, A, x)
+
+@test all(B1 .≈ B2)
+
+@benchmark mysubcol!($B1, $A, $x)
+@benchmark mysubcolavx!($B2, $A, $x)
+
+@code_native debuginfo=:none mysubcol!(B1, A, x)
+@code_native debuginfo=:none mysubcolavx!(B2, A, x)
+
+
 lsgemv.preamble
 LoopVectorization.lower(lsgemv)
 LoopVectorization.lower_unrolled(lsgemv, 4);