Skip to content

Commit 30923eb

Browse files
committed
Fix problem in cost evaluation.
1 parent 93c4d7f commit 30923eb

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

src/determinestrategy.jl

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,7 @@ function evaluate_cost_tile(
684684
reg_pressure = reg_pres_buf(ls)
685685
# @inbounds reg_pressure[2] = 1
686686
# @inbounds reg_pressure[3] = 1
687-
iter::Int = 1
687+
iter::Float64 = 1.0
688688
u₁reached = u₂reached = false
689689
choose_to_inline = Ref(false)
690690
copyto!(names(ls), order); reverse!(names(ls))
@@ -716,8 +716,10 @@ function evaluate_cost_tile(
716716
depends_on_u₁ = isu₁unrolled(op)
717717
depends_on_u₂ = isu₂unrolled(op)
718718
# cost is reduced by unrolling u₁ if it is interior to u₁loop (true if either u₁reached, or if depends on u₂ [or u₁]) and doesn't depend on u₁
719-
reduced_by_unrolling[1,id] = (u₁reached | depends_on_u₂) & !depends_on_u₁
720-
reduced_by_unrolling[2,id] = (u₂reached | depends_on_u₁) & !depends_on_u₂
719+
# reduced_by_unrolling[1,id] = (u₁reached | depends_on_u₂) & !depends_on_u₁
720+
# reduced_by_unrolling[2,id] = (u₂reached | depends_on_u₁) & !depends_on_u₂
721+
reduced_by_unrolling[1,id] = (u₁reached) & !depends_on_u₁
722+
reduced_by_unrolling[2,id] = (u₂reached) & !depends_on_u₂
721723
# @show op iter, unrolledu₂loopsym[:,id]
722724
iters[id] = iter
723725
innerloop loopdependencies(op) && set_upstream_family!(descendentsininnerloop, op, true)
@@ -736,7 +738,6 @@ function evaluate_cost_tile(
736738
continue
737739
end
738740
end
739-
# @show op rt, lat, rp
740741
rt, lat, rp = cost(ls, op, vectorized, Wshift, size_T)
741742
if isload(op) && !iszero(prefetchisagoodidea(ls, op, UnrollArgs(4, unrollsyms, 4, 0)))
742743
rt += 0.5VectorizationBase.REGISTER_SIZE / VectorizationBase.CACHELINE_SIZE
@@ -745,17 +746,22 @@ function evaluate_cost_tile(
745746
# @show isunrolled₁, isunrolled₂, op rt, lat, rp
746747
rp = (opisininnerloop && !(loadintostore(ls, op))) ? rp : zero(rp) # we only care about register pressure within the inner most loop
747748
# rp = opisininnerloop ? rp : zero(rp) # we only care about register pressure within the inner most loop
749+
rto = rt
748750
rt *= iters[id]
749751
if u₁reduces & u₂reduces
752+
# @show op 4, rto, iters[id], lat, rp
750753
cost_vec[4] += rt
751754
reg_pressure[4] += rp
752755
elseif u₂reduces # cost decreased by unrolling u₂loop
756+
# @show op 2, rto, iters[id], lat, rp
753757
cost_vec[2] += rt
754758
reg_pressure[2] += rp
755759
elseif u₁reduces # cost decreased by unrolling u₁loop
760+
# @show op 3, rto, iters[id], lat, rp
756761
cost_vec[3] += rt
757762
reg_pressure[3] += rp
758763
else # no cost decrease; cost must be repeated
764+
# @show op 1, rto, iters[id], lat, rp
759765
cost_vec[1] += rt
760766
reg_pressure[1] += rp
761767
end

0 commit comments

Comments
 (0)