Skip to content

Commit 000b22a

Browse files
authored
Fix type instability in multithreaded reductions (#269)
* Fix type instability. * Bump version
1 parent 526d990 commit 000b22a

File tree

3 files changed

+12
-16
lines changed

3 files changed

+12
-16
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.22"
4+
version = "0.12.23"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/codegen/lower_threads.jl

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,14 @@ end
2525
offset = ThreadingUtilities.store!(p, args, offset)
2626
nothing
2727
end
28+
29+
struct StaticType{T} end
30+
@inline gettype(::StaticType{T}) where {T} = T
31+
2832
@inline function avx_launch(
29-
::Val{UNROLL}, ::Val{OPS}, ::Val{ARF}, ::Val{AM}, ::Val{LPSYM}, lbvargs::LBV, tid
30-
) where {UNROLL,OPS,ARF,AM,LPSYM,K,LBV<:Tuple{Vararg{Any,K}}}
31-
fargs = flatten_to_tuple(lbvargs)
32-
ThreadingUtilities.launch(setup_avx_threads!, tid, pointer(AVX{UNROLL,OPS,ARF,AM,LPSYM,LBV,typeof(fargs)}()), fargs)
33+
::Val{UNROLL}, ::Val{OPS}, ::Val{ARF}, ::Val{AM}, ::Val{LPSYM}, ::StaticType{LBV}, fargs::FARGS, tid
34+
) where {UNROLL,OPS,ARF,AM,LPSYM,K,LBV<:Tuple{Vararg{Any,K}},FARGS}
35+
ThreadingUtilities.launch(setup_avx_threads!, tid, pointer(AVX{UNROLL,OPS,ARF,AM,LPSYM,LBV,FARGS}()), fargs)
3336
end
3437

3538
# function approx_cbrt(x)
@@ -397,10 +400,8 @@ function thread_one_loops_expr(
397400
$iterstop
398401
var"#thread#id#" = vadd_nw(var"#thread#id#", var"#trailzing#zeros#")
399402

400-
avx_launch(
401-
Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM,
402-
($loopboundexpr, var"#vargs#"), var"#thread#id#"
403-
)
403+
var"##lbvargs#to_launch##" = ($loopboundexpr, var"#vargs#")
404+
avx_launch(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, StaticType{typeof(var"##lbvargs#to_launch##")}(), flatten_to_tuple(var"##lbvargs#to_launch##"), var"#thread#id#")
404405

405406
var"#thread#mask#" >>>= var"#trailzing#zeros#"
406407

@@ -587,10 +588,8 @@ function thread_two_loops_expr(
587588
$iterstop2
588589
var"#thread#id#" = vadd_nw(var"#thread#id#", var"#trailzing#zeros#")
589590
# @show var"#thread#id#" $loopboundexpr
590-
avx_launch(
591-
Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM,
592-
($loopboundexpr, var"#vargs#"), var"#thread#id#"
593-
)
591+
var"##lbvargs#to_launch##" = ($loopboundexpr, var"#vargs#")
592+
avx_launch(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, StaticType{typeof(var"##lbvargs#to_launch##")}(), flatten_to_tuple(var"##lbvargs#to_launch##"), var"#thread#id#")
594593
var"#thread#mask#" >>>= var"#trailzing#zeros#"
595594

596595
var"##end#inner##" = var"#thread#launch#count#0#" == vsub_nw(var"#thread#factor#0#", 0x00000001)

src/condense_loopset.jl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@
33
Base.:|(u::Unsigned, it::IndexType) = u | UInt8(it)
44
Base.:(==)(u::Unsigned, it::IndexType) = (u % UInt8) == UInt8(it)
55

6-
struct StaticType{T} end
7-
@inline gettype(::StaticType{T}) where {T} = T
8-
96
function _append_fields!(t::Expr, body::Expr, sym::Symbol, ::Type{T}) where {T}
107
gf = GlobalRef(Core,:getfield)
118
for f 1:fieldcount(T)

0 commit comments

Comments
 (0)