Skip to content

Commit 2307d26

Browse files
committed
Don't throw an error if a tiling order is degenerate, just reject that order.
1 parent 2572896 commit 2307d26

File tree

3 files changed

+55
-13
lines changed

3 files changed

+55
-13
lines changed

Manifest.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,15 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
6161

6262
[[SIMDPirates]]
6363
deps = ["MacroTools", "VectorizationBase"]
64-
git-tree-sha1 = "72c002943060fe3518b77faf671a986652ca8f44"
64+
git-tree-sha1 = "3e45c76dfcc349ff208a955e1ce6e92b1be6d15e"
6565
repo-rev = "master"
6666
repo-url = "https://github.com/chriselrod/SIMDPirates.jl"
6767
uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
6868
version = "0.1.0"
6969

7070
[[SLEEFPirates]]
7171
deps = ["SIMDPirates", "VectorizationBase"]
72-
git-tree-sha1 = "42cbc7f06b1f2063fc08b2aa2f8cd2e70d1e91bc"
72+
git-tree-sha1 = "ba032bbcc7038853867119f4cac383a0051b62a8"
7373
repo-rev = "master"
7474
repo-url = "https://github.com/chriselrod/SLEEFPirates.jl"
7575
uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
@@ -87,7 +87,7 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
8787

8888
[[VectorizationBase]]
8989
deps = ["CpuId", "LinearAlgebra"]
90-
git-tree-sha1 = "913138bbc3e1892fbdd379fa48cafffe46a445c2"
90+
git-tree-sha1 = "1cc48a9bce5c18f2f70fa16cc5b2b39b39332a9e"
9191
repo-rev = "master"
9292
repo-url = "https://github.com/chriselrod/VectorizationBase.jl"
9393
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"

src/determinestrategy.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,8 @@ function tile_cost(X, U, T)
150150
X[1] + X[4] + X[2] / T + X[3] / U
151151
end
152152
function solve_tilesize(X, R)
153-
first(R) == 0 && return -1,-1,Inf #solve_smalltilesize(X, R, Umax, Tmax)
153+
@inbounds any(iszero, (R[1],R[2],R[3])) && return -1,-1,Inf #solve_smalltilesize(X, R, Umax, Tmax)
154+
# @inbounds any(iszero, (R[1],R[2],R[3])) && return -1,-1,Inf #solve_smalltilesize(X, R, Umax, Tmax)
154155
# We use lagrange multiplier to finding floating point values for U and T
155156
# first solving for U via quadratic formula
156157
# X is vector of costs, and R is of register pressures
@@ -163,6 +164,7 @@ function solve_tilesize(X, R)
163164
Ufloat = (sqrt(b^2 - 4a*c) - b) / (2a)
164165
Tfloat = (RR - Ufloat*R[2])/(Ufloat*R[1])
165166
# @show Ufloat, Tfloat
167+
(isfinite(Tfloat) && isfinite(Ufloat)) || return -1,-1,Inf
166168
Ulow = max(1, floor(Int, Ufloat)) # must be at least 1
167169
Tlow = max(1, floor(Int, Tfloat)) # must be at least 1
168170
Uhigh = Ulow + 1 #ceil(Int, Ufloat)

test/runtests.jl

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,9 @@ function mygemmavx!(C, A, B)
7575
C[i,j] = Cᵢⱼ
7676
end
7777
end
78-
C = Matrix{Float64}(undef, 100, 100); A = randn(100, 100); B = randn(100, 100);
78+
79+
M, K, N = rand(70:81, 3);
80+
C = Matrix{Float64}(undef, M, N); A = randn(M, K); B = randn(K, N);
7981
C2 = similar(C);
8082
mygemmavx!(C, A, B)
8183
mygemm!(C2, A, B)
@@ -97,7 +99,7 @@ LoopVectorization.choose_unroll_order(lsgemm)
9799
ops = LoopVectorization.oporder(lsgemm);
98100
findall(length.(ops) .!= 0)
99101

100-
dotq = :(for i eachindex(a)
102+
dotq = :(for i eachindex(a,b)
101103
s += a[i]*b[i]
102104
end)
103105
lsdot = LoopVectorization.LoopSet(dotq);
@@ -106,17 +108,15 @@ LoopVectorization.lower(lsdot)
106108
lsdot.operations
107109

108110
function mydot(a, b)
109-
@assert length(a) == length(b) "Both arrays must be of equal length."
110111
s = 0.0
111-
@inbounds @simd for i eachindex(a)
112+
@inbounds @simd for i eachindex(a,b)
112113
s += a[i]*b[i]
113114
end
114115
s
115116
end
116117
function mydotavx(a, b)
117-
@assert length(a) == length(b) "Both arrays must be of equal length."
118118
s = 0.0
119-
@avx for i eachindex(a)
119+
@avx for i eachindex(a,b)
120120
s += a[i]*b[i]
121121
end
122122
s
@@ -153,13 +153,12 @@ function myselfdotavx(a)
153153
s
154154
end
155155

156-
a = rand(400); b = rand(400);
156+
a = rand(400);
157157
@test myselfdotavx(a) myselfdot(a)
158158

159159
@benchmark myselfdotavx($a)
160160
@benchmark myselfdot($a)
161161

162-
b = rand(43);
163162
@benchmark myselfdotavx($b)
164163
@benchmark myselfdot($b)
165164

@@ -192,6 +191,9 @@ b2'
192191
all(b1 .≈ b2)
193192
@test all(b1 .≈ b2)
194193

194+
@benchmark myvexp!($b1, $a)
195+
@benchmark myvexpavx!($b2, $a)
196+
195197

196198
vexpsq = :(for i eachindex(a)
197199
s += exp(a[i])
@@ -218,7 +220,8 @@ end
218220

219221
@test myvexp(a) myvexpavx(a)
220222

221-
223+
@benchmark myvexp($a)
224+
@benchmark myvexpavx($a)
222225

223226
gemvq = :(for i eachindex(y)
224227
yᵢ = 0.0
@@ -258,6 +261,43 @@ mygemvavx!(y2, A, x)
258261

259262
@test all(y1 .≈ y2)
260263

264+
@benchmark mygemv!($y1, $A, $x)
265+
@benchmark mygemvavx!($y2, $A, $x)
266+
267+
subcolq = :(for i 1:size(A,2), j eachindex(x)
268+
B[j,i] = A[j,i] - x[j]
269+
end)
270+
lssubcol = LoopVectorization.LoopSet(subcolq);
271+
@test LoopVectorization.choose_order(lssubcol) == (Symbol[:j,:i], 4, -1)
272+
LoopVectorization.lower(lssubcol)
273+
274+
function mysubcol!(B, A, x)
275+
@inbounds for i 1:size(A,2)
276+
@simd for j eachindex(x)
277+
B[j,i] = A[j,i] - x[j]
278+
end
279+
end
280+
end
281+
function mysubcolavx!(B, A, x)
282+
@avx for i 1:size(A,2), j eachindex(x)
283+
B[j,i] = A[j,i] - x[j]
284+
end
285+
end
286+
A = randn(199, 498); x = randn(size(A,1));
287+
B1 = similar(A); B2 = similar(A);
288+
289+
mysubcol!(B1, A, x)
290+
mysubcolavx!(B2, A, x)
291+
292+
@test all(B1 .≈ B2)
293+
294+
@benchmark mysubcol!($B1, $A, $x)
295+
@benchmark mysubcolavx!($B2, $A, $x)
296+
297+
@code_native debuginfo=:none mysubcol!(B1, A, x)
298+
@code_native debuginfo=:none mysubcolavx!(B2, A, x)
299+
300+
261301
lsgemv.preamble
262302
LoopVectorization.lower(lsgemv)
263303
LoopVectorization.lower_unrolled(lsgemv, 4);

0 commit comments

Comments
 (0)