|
25 | 25 | offset = ThreadingUtilities.store!(p, args, offset)
|
26 | 26 | nothing
|
27 | 27 | end
|
| 28 | + |
| 29 | +struct StaticType{T} end |
| 30 | +@inline gettype(::StaticType{T}) where {T} = T |
| 31 | + |
28 | 32 | @inline function avx_launch(
|
29 |
| - ::Val{UNROLL}, ::Val{OPS}, ::Val{ARF}, ::Val{AM}, ::Val{LPSYM}, lbvargs::LBV, tid |
30 |
| -) where {UNROLL,OPS,ARF,AM,LPSYM,K,LBV<:Tuple{Vararg{Any,K}}} |
31 |
| - fargs = flatten_to_tuple(lbvargs) |
32 |
| - ThreadingUtilities.launch(setup_avx_threads!, tid, pointer(AVX{UNROLL,OPS,ARF,AM,LPSYM,LBV,typeof(fargs)}()), fargs) |
| 33 | + ::Val{UNROLL}, ::Val{OPS}, ::Val{ARF}, ::Val{AM}, ::Val{LPSYM}, ::StaticType{LBV}, fargs::FARGS, tid |
| 34 | +) where {UNROLL,OPS,ARF,AM,LPSYM,K,LBV<:Tuple{Vararg{Any,K}},FARGS} |
| 35 | + ThreadingUtilities.launch(setup_avx_threads!, tid, pointer(AVX{UNROLL,OPS,ARF,AM,LPSYM,LBV,FARGS}()), fargs) |
33 | 36 | end
|
34 | 37 |
|
35 | 38 | # function approx_cbrt(x)
|
@@ -397,10 +400,8 @@ function thread_one_loops_expr(
|
397 | 400 | $iterstop
|
398 | 401 | var"#thread#id#" = vadd_nw(var"#thread#id#", var"#trailzing#zeros#")
|
399 | 402 |
|
400 |
| - avx_launch( |
401 |
| - Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, |
402 |
| - ($loopboundexpr, var"#vargs#"), var"#thread#id#" |
403 |
| - ) |
| 403 | + var"##lbvargs#to_launch##" = ($loopboundexpr, var"#vargs#") |
| 404 | + avx_launch(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, StaticType{typeof(var"##lbvargs#to_launch##")}(), flatten_to_tuple(var"##lbvargs#to_launch##"), var"#thread#id#") |
404 | 405 |
|
405 | 406 | var"#thread#mask#" >>>= var"#trailzing#zeros#"
|
406 | 407 |
|
@@ -587,10 +588,8 @@ function thread_two_loops_expr(
|
587 | 588 | $iterstop2
|
588 | 589 | var"#thread#id#" = vadd_nw(var"#thread#id#", var"#trailzing#zeros#")
|
589 | 590 | # @show var"#thread#id#" $loopboundexpr
|
590 |
| - avx_launch( |
591 |
| - Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, |
592 |
| - ($loopboundexpr, var"#vargs#"), var"#thread#id#" |
593 |
| - ) |
| 591 | + var"##lbvargs#to_launch##" = ($loopboundexpr, var"#vargs#") |
| 592 | + avx_launch(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, StaticType{typeof(var"##lbvargs#to_launch##")}(), flatten_to_tuple(var"##lbvargs#to_launch##"), var"#thread#id#") |
594 | 593 | var"#thread#mask#" >>>= var"#trailzing#zeros#"
|
595 | 594 |
|
596 | 595 | var"##end#inner##" = var"#thread#launch#count#0#" == vsub_nw(var"#thread#factor#0#", 0x00000001)
|
|
0 commit comments