Skip to content

Commit e3aff00

Browse files
committed
Threading tweaks for AArch64
1 parent d96705c commit e3aff00

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

src/codegen/lower_threads.jl

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,11 @@ end
159159
# ni = cld(N, fN)
160160
# block_per_m, blocks_per_n
161161
# end
162-
163-
@inline choose_num_threads(C::Float64, NT::UInt, x::Base.BitInteger) = _choose_num_threads(Base.FastMath.mul_float_fast(C, 0.05460264079015985), NT, x)
162+
if Sys.ARCH === :x86_64
163+
@inline choose_num_threads(C::Float64, NT::UInt, x::Base.BitInteger) = _choose_num_threads(Base.FastMath.mul_float_fast(C, 0.05460264079015985), NT, x)
164+
else
165+
@inline choose_num_threads(C::Float64, NT::UInt, x::Base.BitInteger) = _choose_num_threads(Base.FastMath.mul_float_fast(C, 0.05460264079015985 * 0.25), NT, x)
166+
end
164167
@inline _choose_num_threads(C::Float64, NT::UInt, x::Base.BitInteger) = min(Base.fptoui(UInt, Base.ceil_llvm(Base.FastMath.mul_float_fast(C, Base.sqrt_llvm(Base.uitofp(Float64, x))))), NT)
165168
function push_loop_length_expr!(q::Expr, ls::LoopSet)
166169
l = 1
@@ -326,6 +329,9 @@ function thread_one_loops_expr(
326329
)
327330
looplen = looplengthprod(ls)
328331
c = 0.05460264079015985 * c / looplen
332+
if Sys.ARCH !== :x86_64
333+
c *= 0.25
334+
end
329335
if all(isstaticloop, ls.loops)
330336
_num_threads = _choose_num_threads(c, ntmax, Int64(looplen))::UInt
331337
_num_threads > 1 || return avx_body(ls, UNROLL)
@@ -450,6 +456,9 @@ function thread_two_loops_expr(
450456
)
451457
looplen = looplengthprod(ls)
452458
c = 0.05460264079015985 * c / looplen
459+
if Sys.ARCH !== :x86_64
460+
c *= 0.25
461+
end
453462
if all(isstaticloop, ls.loops)
454463
_num_threads = _choose_num_threads(c, ntmax, Int64(looplen))::UInt
455464
_num_threads > 1 || return avx_body(ls, UNROLL)
@@ -521,6 +530,7 @@ function thread_two_loops_expr(
521530
$loopstart1
522531
var"#loop#1#start#init#" = var"#iter#start#0#"
523532
$loopstart2
533+
# @show var"#nrequest#"
524534
var"##do#thread##" = var"#nrequest#" 0x00000000
525535
if var"##do#thread##"
526536
var"#threads#", var"#torelease#" = CheapThreads.request_threads(Threads.threadid(), var"#nrequest#")

0 commit comments

Comments
 (0)