Skip to content

Commit e390c17

Browse files
committed
Need to use opoffsets for symlicms.
1 parent 8771d75 commit e390c17

File tree

6 files changed

+30
-12
lines changed

6 files changed

+30
-12
lines changed

Manifest.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,19 @@ deps = ["Base64"]
3232
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
3333

3434
[[OffsetArrays]]
35-
git-tree-sha1 = "6a35d9446b40ae5004cd7bd0f1ae3505528c7fd6"
35+
git-tree-sha1 = "930db8ef90483570107f2396b1ffc6680f08e8b7"
3636
uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
37-
version = "1.0.3"
37+
version = "1.0.4"
3838

3939
[[Random]]
4040
deps = ["Serialization"]
4141
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
4242

4343
[[SIMDPirates]]
4444
deps = ["VectorizationBase"]
45-
git-tree-sha1 = "8f89aa38f5e4e89f2a474ffdc850fc21d6ab9ed4"
45+
git-tree-sha1 = "53c43af0172c24b0783bd93650bd8b78afb3e57b"
4646
uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
47-
version = "0.7.4"
47+
version = "0.7.5"
4848

4949
[[SLEEFPirates]]
5050
deps = ["Libdl", "SIMDPirates", "VectorizationBase"]

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.6.24"
4+
version = "0.6.25"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -13,7 +13,7 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1313

1414
[compat]
1515
OffsetArrays = "1"
16-
SIMDPirates = "0.7.4"
16+
SIMDPirates = "0.7.5"
1717
SLEEFPirates = "0.4"
1818
UnPack = "0"
1919
VectorizationBase = "0.9.5"

src/determinestrategy.jl

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,19 @@ function tile_cost(X, U, T, UL, TL)
245245
# X[1]*Tfactor*Ufactor + X[4] + X[2] * Tfactor + X[3] * Ufactor
246246
X[1] + X[4] + X[2] * Tfactor + X[3] * Ufactor
247247
end
248+
# function itertilesize(X, UL, TL)
249+
# cb = Inf
250+
# Ub = 1; Tb = 1
251+
# for U ∈ 1:4, T ∈ 1:4
252+
# c = tile_cost(X, U, T, UL, TL)
253+
# @show U, T, c
254+
# if cb > c
255+
# cb = c
256+
# Ub = U; Tb = T
257+
# end
258+
# end
259+
# Ub, Tb, cb
260+
# end
248261
function solve_tilesize(X, R, UL, TL)
249262
# @inbounds any(iszero, (R[1],R[2],R[3])) && return -1,-1,Inf #solve_smalltilesize(X, R, Umax, Tmax)
250263
first(iszero(R)) && return -1,-1,Inf #solve_smalltilesize(X, R, Umax, Tmax)
@@ -253,14 +266,17 @@ function solve_tilesize(X, R, UL, TL)
253266
# first solving for U via quadratic formula
254267
# X is vector of costs, and R is of register pressures
255268
RR = REGISTER_COUNT - R[3] - R[4] # RR ≡ RemainingRegisters
269+
R[1] + R[2] > 0.5RR && return 1,1, tile_cost(X, 1, 1, UL, TL)
256270
a = (R[1])^2*X[2] - (R[2])^2*R[1]*X[3]/RR
257271
b = 2*R[1]*R[2]*X[3]
258272
c = -RR*R[1]*X[3]
259-
Ufloat = (sqrt(b^2 - 4a*c) - b) / (2a)
260-
Tfloat = (RR - Ufloat*R[2])/(Ufloat*R[1])
261-
# @show Ufloat, Tfloat
273+
discriminant = b^2 - 4a*c
274+
discriminant < 0 && return -1,-1,Inf
275+
Ufloat = (sqrt(discriminant) - b) / (2a)
276+
Tfloat = (RR - max(1.0,Ufloat)*R[2])/(max(1.0,Ufloat)*R[1])
262277
if !(isfinite(Tfloat) && isfinite(Ufloat))
263278
return 4, 4, tile_cost(X, 4, 4, UL, TL)
279+
# return itertilesize(X, UL, TL)
264280
end
265281
Ulow = max(1, floor(Int, Ufloat)) # must be at least 1
266282
Tlow = max(1, floor(Int, Tfloat)) # must be at least 1

src/graphs.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ function add_operation!(
469469
elseif RHS.head === :if
470470
add_if!(ls, LHS, RHS, elementbytes, position)
471471
else
472-
throw("Expression not recognized:\n$x")
472+
throw("Expression not recognized:\n$RHS")
473473
end
474474
end
475475
add_operation!(ls::LoopSet, RHS::Expr, elementbytes::Int, position::Int) = add_operation!(ls, gensym(:LHS), RHS, elementbytes, position)

src/lower_compute.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ function lower_compute!(
2828
end
2929
parentsunrolled = isunrolled_sym.(parents_op, unrolled, tiled)
3030
if instr.instr === :identity && name(first(parents_op)) === var && isone(length(parents_op))
31-
if (opunrolled == first(parentsunrolled)) && ((!isnothing(suffix)) == first(parentstiled))
31+
if (opunrolled == first(parentsunrolled)) && ((!isnothing(suffix)) == parentstiled[1])
3232
return
3333
end
3434
end
@@ -58,6 +58,7 @@ function lower_compute!(
5858
for u 0:U-1
5959
push!(q.args, Expr(:(=), Symbol(newparentname, u), Symbol(parentname, u)))
6060
end
61+
@show parentop
6162
reduce_expr!(q, newparentname, Instruction(reduction_to_single_vector(instruction(newparentop))), U)
6263
push!(q.args, Expr(:(=), newparentname, Symbol(newparentname, 0)))
6364
end

src/reconstruct_loopset.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ function process_metadata!(ls::LoopSet, AM, num_arrays::Int)
169169
for (i,si) enumerate(AM[3].parameters)
170170
sii = si::Int
171171
s = gensym(:symlicm)
172-
push!(ls.preamble_symsym, (si, s))
172+
push!(ls.preamble_symsym, (opoffsets[sii] + 1, s))
173173
pushpreamble!(ls, Expr(:(=), s, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__,Symbol(@__FILE__)), Expr(:ref, :vargs, num_arrays + i))))
174174
end
175175
expandbyoffset!(ls.preamble_symint, AM[4].parameters, opoffsets)
@@ -343,6 +343,7 @@ end
343343
# elbytes(::VectorizationBase.AbstractPointer{T}) where {T} = sizeof(T)::Int
344344
typeeltype(::Type{P}) where {T,P<:VectorizationBase.AbstractPointer{T}} = T
345345
typeeltype(::Type{<:AbstractRange{T}}) where {T} = T
346+
# typeeltype(::Any) = Int8
346347

347348
function add_array_symbols!(ls::LoopSet, arraysymbolinds::Vector{Symbol}, offset::Int)
348349
for (i,as) enumerate(arraysymbolinds)

0 commit comments

Comments
 (0)