@@ -114,6 +114,7 @@ function cost(ls::LoopSet, op::Operation, (u₁,u₂)::Tuple{Symbol,Symbol}, vlo
114
114
# Also, once more SVE (scalable vector extension) CPUs are released, would be nice to know if
115
115
# this feature is common to all of them.
116
116
srt += 0.5 reg_size (ls) / cache_lnsze (ls)
117
+ # srt += 0.25reg_size(ls) / cache_lnsze(ls)
117
118
end
118
119
elseif isstore (op) # broadcast or reductionstore; if store we want to penalize reduction
119
120
srt *= 3
@@ -798,6 +799,7 @@ function load_elimination_cost_factor!(
798
799
@unpack u₁loopsym, u₂loopsym, vloopsym = unrollsyms
799
800
if ! iszero (first (isoptranslation (ls, op, unrollsyms)))
800
801
rt, lat, rp = cost (ls, op, (u₁loopsym, u₂loopsym), vloopsym, Wshift, size_T)
802
+ # rt = Core.ifelse(isvectorized(op), 0.5rt, rt)
801
803
rto = rt
802
804
rt *= iters
803
805
# rt *= factor1; rp *= factor2;
@@ -818,7 +820,9 @@ function load_elimination_cost_factor!(
818
820
# # (0.25, dynamic_register_count() == 32 ? 1.2 : 1.0)
819
821
# (0.25, 1.0)
820
822
# cost_vec[1] -= rt
821
- cost_vec[1 ] -= 0.5625 * iters
823
+ # cost_vec[1] -= 0.5625 * iters
824
+ # cost_vec[1] -= 0.5625 * iters / 2
825
+ # @show rto, 0.8rt, op
822
826
reg_pressure[1 ] += 0.25 rp
823
827
cost_vec[2 ] += rt
824
828
reg_pressure[2 ] += rp
@@ -924,7 +928,7 @@ function evaluate_cost_tile(ls::LoopSet, order::Vector{Symbol}, unrollsyms::Unro
924
928
nops = length (operations (ls))
925
929
iters = Vector {Float64} (undef, nops)
926
930
reduced_by_unrolling = Array {Bool} (undef, 2 , 2 , nops)
927
- evaluate_cost_tile! (iters, reduced_bu_unrolling , ls, order, unrollsyms, anyisbit)
931
+ evaluate_cost_tile! (iters, reduced_by_unrolling , ls, order, unrollsyms, anyisbit)
928
932
end
929
933
function evaluate_cost_tile! (
930
934
iters:: Vector{Float64} , reduced_by_unrolling:: Array{Bool,3} , ls:: LoopSet , order:: Vector{Symbol} , unrollsyms:: UnrollSymbols , anyisbit:: Bool
@@ -1062,6 +1066,7 @@ function evaluate_cost_tile!(
1062
1066
rp = opisininnerloop ? rp : zero (rp) # we only care about register pressure within the inner most loop
1063
1067
rto = rt
1064
1068
rt *= iters[id]
1069
+ # @show (u₁reducesrt, u₂reducesrt), rto, rt, lat, rp, op
1065
1070
if isstore (op) & (! u₁reducesrt) & (! u₂reducesrt)
1066
1071
irreducible_storecosts += rt
1067
1072
end
0 commit comments