@@ -684,7 +684,7 @@ function evaluate_cost_tile(
684
684
reg_pressure = reg_pres_buf (ls)
685
685
# @inbounds reg_pressure[2] = 1
686
686
# @inbounds reg_pressure[3] = 1
687
- iter:: Int = 1
687
+ iter:: Float64 = 1.0
688
688
u₁reached = u₂reached = false
689
689
choose_to_inline = Ref (false )
690
690
copyto! (names (ls), order); reverse! (names (ls))
@@ -716,8 +716,10 @@ function evaluate_cost_tile(
716
716
depends_on_u₁ = isu₁unrolled (op)
717
717
depends_on_u₂ = isu₂unrolled (op)
718
718
# cost is reduced by unrolling u₁ if it is interior to u₁loop (true if either u₁reached, or if depends on u₂ [or u₁]) and doesn't depend on u₁
719
- reduced_by_unrolling[1 ,id] = (u₁reached | depends_on_u₂) & ! depends_on_u₁
720
- reduced_by_unrolling[2 ,id] = (u₂reached | depends_on_u₁) & ! depends_on_u₂
719
+ # reduced_by_unrolling[1,id] = (u₁reached | depends_on_u₂) & !depends_on_u₁
720
+ # reduced_by_unrolling[2,id] = (u₂reached | depends_on_u₁) & !depends_on_u₂
721
+ reduced_by_unrolling[1 ,id] = (u₁reached) & ! depends_on_u₁
722
+ reduced_by_unrolling[2 ,id] = (u₂reached) & ! depends_on_u₂
721
723
# @show op iter, unrolledu₂loopsym[:,id]
722
724
iters[id] = iter
723
725
innerloop ∈ loopdependencies (op) && set_upstream_family! (descendentsininnerloop, op, true )
@@ -736,7 +738,6 @@ function evaluate_cost_tile(
736
738
continue
737
739
end
738
740
end
739
- # @show op rt, lat, rp
740
741
rt, lat, rp = cost (ls, op, vectorized, Wshift, size_T)
741
742
if isload (op) && ! iszero (prefetchisagoodidea (ls, op, UnrollArgs (4 , unrollsyms, 4 , 0 )))
742
743
rt += 0.5 VectorizationBase. REGISTER_SIZE / VectorizationBase. CACHELINE_SIZE
@@ -745,17 +746,22 @@ function evaluate_cost_tile(
745
746
# @show isunrolled₁, isunrolled₂, op rt, lat, rp
746
747
rp = (opisininnerloop && ! (loadintostore (ls, op))) ? rp : zero (rp) # we only care about register pressure within the inner most loop
747
748
# rp = opisininnerloop ? rp : zero(rp) # we only care about register pressure within the inner most loop
749
+ rto = rt
748
750
rt *= iters[id]
749
751
if u₁reduces & u₂reduces
752
+ # @show op 4, rto, iters[id], lat, rp
750
753
cost_vec[4 ] += rt
751
754
reg_pressure[4 ] += rp
752
755
elseif u₂reduces # cost decreased by unrolling u₂loop
756
+ # @show op 2, rto, iters[id], lat, rp
753
757
cost_vec[2 ] += rt
754
758
reg_pressure[2 ] += rp
755
759
elseif u₁reduces # cost decreased by unrolling u₁loop
760
+ # @show op 3, rto, iters[id], lat, rp
756
761
cost_vec[3 ] += rt
757
762
reg_pressure[3 ] += rp
758
763
else # no cost decrease; cost must be repeated
764
+ # @show op 1, rto, iters[id], lat, rp
759
765
cost_vec[1 ] += rt
760
766
reg_pressure[1 ] += rp
761
767
end
0 commit comments