Skip to content

Commit 0586abc

Browse files
committed
Faster thread ramp up again
1 parent 9aa0211 commit 0586abc

File tree

1 file changed

+10
-14
lines changed

1 file changed

+10
-14
lines changed

src/codegen/lower_threads.jl

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -246,11 +246,6 @@ function thread_loop_summary!(ls::LoopSet, ua::UnrollArgs, threadedloop::Loop, i
246246
:($lensym = $((threadedloop.lensym)) % UInt)
247247
end
248248
unroll_factor = Core.ifelse(threadedloop === vloop, W, 1)
249-
# if threadedloop === u₁loop
250-
# unroll_factor *= u₁
251-
# elseif threadedloop === u₂loop
252-
# unroll_factor *= u₂
253-
# end
254249
num_unroll_sym = Symbol("#num#unrolls#thread#$threadloopnumtag#")
255250
define_num_unrolls = if unroll_factor == 1
256251
:($num_unroll_sym = $lensym)
@@ -334,15 +329,19 @@ function define_block_size(threadedloop, vloop, tn, W)
334329
end
335330
end
336331
end
332+
function scale_cost(c, looplen)
333+
c = 0.05 * c / looplen
334+
if Sys.ARCH !== :x86_64
335+
c *= 0.25
336+
end
337+
c
338+
end
337339
function thread_one_loops_expr(
338340
ls::LoopSet, ua::UnrollArgs, valid_thread_loop::Vector{Bool}, ntmax::UInt, c::Float64,
339341
UNROLL::Tuple{Bool,Int8,Int8,Bool,Int,Int,Int,Int,Int,Int,Int,UInt}, OPS::Expr, ARF::Expr, AM::Expr, LPSYM::Expr
340342
)
341343
looplen = looplengthprod(ls)
342-
c = 0.0225 * c / looplen
343-
if Sys.ARCH !== :x86_64
344-
c *= 0.25
345-
end
344+
c = scale_cost(c, looplen)
346345
if all(isstaticloop, ls.loops)
347346
_num_threads = _choose_num_threads(c, ntmax, Int64(looplen))::UInt
348347
_num_threads > 1 || return avx_body(ls, UNROLL)
@@ -376,7 +375,6 @@ function thread_one_loops_expr(
376375
nothing
377376
end
378377
retexpr = length(ls.outer_reductions) > 0 ? :(return $retv) : :(return nothing)
379-
# @unpack u₁loop, u₂loop, vloop, u₁, u₂max = ua
380378
iterdef = define_block_size(threadedloop, ua.vloop, 0, ls.vector_width)
381379
q = quote
382380
$choose_nthread # UInt
@@ -479,10 +477,8 @@ function thread_two_loops_expr(
479477
UNROLL::Tuple{Bool,Int8,Int8,Bool,Int,Int,Int,Int,Int,Int,Int,UInt}, OPS::Expr, ARF::Expr, AM::Expr, LPSYM::Expr
480478
)
481479
looplen = looplengthprod(ls)
482-
c = 0.0225 * c / looplen
483-
if Sys.ARCH !== :x86_64
484-
c *= 0.25
485-
end
480+
# c = 0.0225 * c / looplen
481+
c = scale_cost(c, looplen)
486482
if all(isstaticloop, ls.loops)
487483
_num_threads = _choose_num_threads(c, ntmax, Int64(looplen))::UInt
488484
_num_threads > 1 || return avx_body(ls, UNROLL)

0 commit comments

Comments
 (0)