Skip to content

Commit 96aef96

Browse files
committed
clamp unroll extrema and add div to cost table (copying fdiv for now...)
1 parent 8b5f640 commit 96aef96

File tree

3 files changed

+8
-7
lines changed

3 files changed

+8
-7
lines changed

src/codegen/operation_evaluation_order.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ function fillorder!(ls::LoopSet, order::Vector{Symbol}, u₁loop::Symbol, u₂lo
105105
ro[_n] = loopsym = order[n]
106106
#loopsym = order[n]
107107
for op ops
108-
addoptoorder!( ls, included_vars, place_after_loop, op, loopsym, _n, u₁loop, u₂loop, vectorized, u₂max )
108+
addoptoorder!( ls, included_vars, place_after_loop, op, loopsym, _n, u₁loop, u₂loop, vectorized, u₂max )
109109
end
110110
end
111111
end

src/modeling/costs.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ const COST = Dict{Symbol,InstructionCost}(
140140
:rem_fast => InstructionCost(13,4.0,-2.0), # FIXME
141141
:div_fast => InstructionCost(13,4.0,-2.0),
142142
:vdiv_fast => InstructionCost(20,4.0,-2.0), # FIXME
143+
:÷ => InstructionCost(13,4.0,-2.0),
143144
# :evadd => InstructionCost(4,0.5),
144145
# :evsub => InstructionCost(4,0.5),
145146
# :evmul => InstructionCost(4,0.5),
@@ -494,6 +495,8 @@ const FUNCTIONSYMBOLS = IdDict{Type{<:Function},Instruction}(
494495
# typeof(VectorizationBase.vfdiv) => :(/),
495496
# typeof(VectorizationBase.vfdiv!) => :(/),
496497
typeof(VectorizationBase.vdiv) => :(/),
498+
typeof(÷) => :(÷),
499+
typeof(Base.FastMath.div_fast) => :div_fast,
497500
typeof(Base.FastMath.div_fast) => :div_fast,
498501
typeof(Base.FastMath.rem_fast) => :rem_fast,
499502
typeof(==) => :(==),

src/modeling/determinestrategy.jl

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -469,19 +469,17 @@ function solve_unroll_lagrange(X, R, u₁L, u₂L, u₁step::Int, u₂step::Int,
469469
u₂low = max(u₂step, floor(Int, 0.8u₂float)) # must be at least 1
470470
u₁high = solve_unroll_constT(R, u₂low) + u₁step
471471
u₂high = solve_unroll_constU(R, u₁low) + u₂step
472-
# @show u₁low, u₁high, u₂low, u₂high
473472
if u₁low u₁high
474473
u₁low = solve_unroll_constT(R, u₂high)
475474
end
476475
if u₂low u₂high
477476
u₂low = solve_unroll_constU(R, u₁high)
478477
end
479478
maxunroll = atleast32registers ? (((X₂ > 0) & (X₃ > 0)) ? 10 : 8) : 6
480-
u₁low = (min(u₁low, maxunroll) ÷ u₁step) * u₁step
481-
u₂low = (min(u₂low, maxunroll) ÷ u₂step) * u₂step
482-
u₁high = min(u₁high, maxunroll)
483-
u₂high = min(u₂high, maxunroll)
484-
# @show u₁low, u₁high, u₂low, u₂high, u₁float, u₂float
479+
u₁low = (clamp(u₁low, 1, maxunroll) ÷ u₁step) * u₁step
480+
u₂low = (clamp(u₂low, 1, maxunroll) ÷ u₂step) * u₂step
481+
u₁high = clamp(u₁high, 1, maxunroll)
482+
u₂high = clamp(u₂high, 1, maxunroll)
485483
solve_unroll_iter(X, R, u₁L, u₂L, reverse(u₁low:u₁step:u₁high), reverse(u₂low:u₂step:u₂high))
486484
end
487485

0 commit comments

Comments
 (0)