Skip to content

Commit 87db87b

Browse files
committed
needless microopt
1 parent b7c6c02 commit 87db87b

File tree

1 file changed

+30
-16
lines changed

1 file changed

+30
-16
lines changed

src/modeling/determinestrategy.jl

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,6 +1501,7 @@ struct LoopOrders
15011501
syms_nr::Vector{Symbol}
15021502
syms_r::Vector{Symbol}
15031503
buff::Vector{Symbol}
1504+
state::Vector{Int}
15041505
end
15051506

15061507
function outer_reduct_loopordersplit(ls::LoopSet)
@@ -1549,18 +1550,20 @@ function LoopOrders(ls::LoopSet)
15491550
LoopOrders(
15501551
nonreductsyms,
15511552
reductsyms,
1552-
Vector{Symbol}(undef, length(ls.loopsymbols))
1553+
Vector{Symbol}(undef, length(ls.loopsymbols)),
1554+
Vector{Int}(undef, length(ls.loopsymbols))
15531555
)
15541556
end
15551557

1556-
nonreductview(lo::LoopOrders) = view(lo.buff, 1:length(lo.syms_nr))
1557-
reductview(lo::LoopOrders) = view(lo.buff, 1+length(lo.syms_nr):length(lo.buff))
1558+
nonreductview(lo::LoopOrders) = @inbounds view(lo.buff, 1:length(lo.syms_nr))
1559+
reductview(lo::LoopOrders) = @inbounds view(lo.buff, 1+length(lo.syms_nr):length(lo.buff))
15581560
function Base.iterate(lo::LoopOrders)
1559-
copyto!(nonreductview(lo), lo.syms_nr)
1560-
copyto!(reductview(lo), lo.syms_r)
1561+
_copyto!(nonreductview(lo), lo.syms_nr)
1562+
_copyto!(reductview(lo), lo.syms_r)
15611563
nr = length(lo.syms_nr)
15621564
r = length(lo.syms_r)
1563-
state = zeros(Int, nr + r)
1565+
state = lo.state
1566+
_fill!(state,0)
15641567
lo.buff, (view(state, 1:nr), view(state, 1+nr:nr+r))
15651568
end
15661569

@@ -1582,10 +1585,20 @@ function advance_state!(state)
15821585
end
15831586
true
15841587
end
1585-
function advance_state!(state, Nr)
1588+
function _copyto!(x,y)
1589+
@inbounds for i = eachindex(x,y)
1590+
x[i]=y[i]
1591+
end
1592+
end
1593+
function _fill!(x,y)
1594+
@inbounds for i = eachindex(x)
1595+
x[i]=y
1596+
end
1597+
end
1598+
function advance_state!(state, Nr)::Bool
15861599
state_nr = view(state, 1:Nr)
15871600
advance_state!(state_nr) && return true
1588-
fill!(state_nr, 0)
1601+
_fill!(state_nr, 0)
15891602
advance_state!(view(state, 1+Nr:length(state)))
15901603
end
15911604
swap!(x::AbstractVector, i::Int, j::Int) = (x[j], x[i]) = (x[i], x[j])
@@ -1594,20 +1607,21 @@ function swap!(
15941607
src::AbstractVector{Symbol},
15951608
offs::AbstractVector{Int}
15961609
)
1597-
copyto!(dest, src)
1610+
_copyto!(dest, src)
15981611
for i eachindex(offs)
15991612
sᵢ = offs[i]
16001613
sᵢ == 0 || swap!(dest, i, i + sᵢ)
16011614
end
16021615
end
16031616
# This is not a good algorithm
1604-
function Base.iterate(lo::LoopOrders, (state_nr, state_r))
1617+
@inline function Base.iterate(lo::LoopOrders, states)
1618+
(state_nr, state_r) = states
16051619
if advance_state!(state_nr)
16061620
swap!(nonreductview(lo), lo.syms_nr, state_nr)
16071621
else
16081622
advance_state!(state_r) || return nothing
1609-
fill!(state_nr, 0)
1610-
copyto!(nonreductview(lo), lo.syms_nr)
1623+
_fill!(state_nr, 0)
1624+
_copyto!(nonreductview(lo), lo.syms_nr)
16111625
swap!(reductview(lo), lo.syms_r, state_r)
16121626
end
16131627
lo.buff, (state_nr, state_r)
@@ -1644,7 +1658,7 @@ function choose_unroll_order(
16441658
cost_temp = evaluate_cost_unroll(ls, new_order, new_vec, lowest_cost, sld)
16451659
if cost_temp < lowest_cost
16461660
lowest_cost = cost_temp
1647-
copyto!(best_order, new_order)
1661+
_copyto!(best_order, new_order)
16481662
best_vec = new_vec
16491663
end
16501664
end
@@ -1769,7 +1783,7 @@ function choose_tile(
17691783
bestu₂ = newu₂
17701784
bestu₁ = newu₁
17711785
loadelim = loadelim_temp
1772-
copyto!(best_order, new_order)
1786+
_copyto!(best_order, new_order)
17731787
save_tilecost!(ls)
17741788
end
17751789
end
@@ -1822,7 +1836,7 @@ function choose_order_cost(ls::LoopSet, v::Int = 0)
18221836
mismatched = mismatchedstorereductions(ls)
18231837
if num_loops(ls) > 1 && tc uc
18241838
@assert ls.loop_order.bestorder === torder
1825-
# copyto!(ls.loop_order.bestorder, torder)
1839+
# _copyto!(ls.loop_order.bestorder, torder)
18261840
return torder,
18271841
tunroll,
18281842
ttile,
@@ -1833,7 +1847,7 @@ function choose_order_cost(ls::LoopSet, v::Int = 0)
18331847
shouldinline
18341848
# return torder, tvec, 4, 4#5, 5
18351849
else
1836-
copyto!(ls.loop_order.bestorder, uorder)
1850+
_copyto!(ls.loop_order.bestorder, uorder)
18371851
UF, uunroll = determine_unroll_factor(ls, uorder, uvec)
18381852
return uorder,
18391853
uunroll,

0 commit comments

Comments
 (0)