@@ -165,11 +165,17 @@ end
165
165
# block_per_m, blocks_per_n
166
166
# end
167
167
if Sys. ARCH === :x86_64
168
- @inline choose_num_threads (C:: Float64 , NT:: UInt , x:: Base.BitInteger ) = _choose_num_threads (Base. mul_float_fast (C, 0.05460264079015985 ), NT, x)
168
+ @inline function choose_num_threads (C:: T , NT:: UInt , x:: Base.BitInteger ) where {T<: Union{Float32,Float64} }
169
+ _choose_num_threads (Base. mul_float_fast (T (C), T (0.05460264079015985 )), NT, x)
170
+ end
169
171
else
170
- @inline choose_num_threads (C:: Float64 , NT:: UInt , x:: Base.BitInteger ) = _choose_num_threads (Base. mul_float_fast (C, 0.05460264079015985 * 0.25 ), NT, x)
172
+ @inline function choose_num_threads (C:: T , NT:: UInt , x:: Base.BitInteger ) where {T<: Union{Float32,Float64} }
173
+ _choose_num_threads (Base. mul_float_fast (C, T (0.05460264079015985 ) * T (0.25 )), NT, x)
174
+ end
175
+ end
176
+ @inline function _choose_num_threads (C:: T , NT:: UInt , x:: Base.BitInteger ) where {T<: Union{Float32,Float64} }
177
+ min (Base. fptoui (UInt, Base. ceil_llvm (Base. mul_float_fast (C, Base. sqrt_llvm_fast (Base. uitofp (T, x))))), NT)
171
178
end
172
- @inline _choose_num_threads (C:: Float64 , NT:: UInt , x:: Base.BitInteger ) = min (Base. fptoui (UInt, Base. ceil_llvm (Base. mul_float_fast (C, Base. sqrt_llvm (Base. uitofp (Float64, x))))), NT)
173
179
function push_loop_length_expr! (q:: Expr , ls:: LoopSet )
174
180
l = 1
175
181
ndynamic = 0
@@ -342,7 +348,7 @@ function thread_one_loops_expr(
342
348
_num_threads > 1 || return avx_body (ls, UNROLL)
343
349
choose_nthread = Expr (:(= ), Symbol (" #nthreads#" ), _num_threads)
344
350
else
345
- choose_nthread = :(_choose_num_threads ($ c , $ ntmax))
351
+ choose_nthread = :(_choose_num_threads ($ ( Float32 (c)) , $ ntmax))
346
352
push_loop_length_expr! (choose_nthread, ls)
347
353
choose_nthread = Expr (:(= ), Symbol (" #nthreads#" ), choose_nthread)
348
354
end
@@ -474,7 +480,7 @@ function thread_two_loops_expr(
474
480
_num_threads > 1 || return avx_body (ls, UNROLL)
475
481
choose_nthread = Expr (:(= ), Symbol (" #nthreads#" ), _num_threads)
476
482
else
477
- choose_nthread = :(_choose_num_threads ($ c , $ ntmax))
483
+ choose_nthread = :(_choose_num_threads ($ ( Float32 (c)) , $ ntmax))
478
484
push_loop_length_expr! (choose_nthread, ls)
479
485
choose_nthread = Expr (:(= ), Symbol (" #nthreads#" ), choose_nthread)
480
486
end
0 commit comments