Skip to content

Commit eafd1d2

Browse files
committed
A couple bug fixes.
1 parent 2768d72 commit eafd1d2

File tree

3 files changed

+24
-13
lines changed

3 files changed

+24
-13
lines changed

src/broadcast.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,7 @@ function add_broadcast!(
100100
end
101101

102102
# size of dest determines loops
103-
# @generated
104-
function vmaterialize!(
103+
@generated function vmaterialize!(
105104
dest::AbstractArray{T,N}, bc::BC
106105
# ) where {T, N, BC <: Broadcasted}
107106
) where {N, T, BC <: Broadcasted}
@@ -119,8 +118,11 @@ function vmaterialize!(
119118
add_broadcast!(ls, :dest, :bc, loopsyms, BC)
120119
add_store!(ls, :dest, ArrayReference(:dest, loopsyms, Ref{Bool}(false)))
121120
resize!(ls.loop_order, num_loops(ls)) # num_loops may be greater than N, eg Product
122-
# lower(ls)
123-
ls
121+
q = lower(ls)
122+
push!(q.args, :dest)
123+
pushfirst!(q.args, Expr(:meta,:inline))
124+
q
125+
# ls
124126
end
125127

126128
function vmaterialize(bc::Broadcasted)

src/lowering.jl

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -646,19 +646,21 @@ function lower_tiled(ls::LoopSet, U::Int, T::Int)
646646
q = Expr(:block, Expr(:(=), mangledtiled, 0))
647647
# we build up the loop expression.
648648
Trem = Tt = T
649-
nloops = num_loops(ls); addtileonly = sum(length, @view(oporder(ls)[:,:,:,:,end])) > 0
649+
nloops = num_loops(ls);
650+
# addtileonly = sum(length, @view(oporder(ls)[:,:,:,:,end])) > 0
650651
Texprtype = (static_tile && tiled_iter < 2T) ? :block : :while
651652
while Tt > 0
652-
tiledloopbody = if addtileonly
653-
tiledloopbody = lower_nest(ls, nloops, U, T, nothing, 0, W, nothing, :block)
654-
else
655-
Expr(:block, Expr(:(=), unrolled, 0))
656-
end
657-
push!(q.args, Texprtype === :block ? tiledloopbody : Expr(Texprtype, looprange(ls, tiled, Tt, tiledsym(tiled)), tiledloopbody))
653+
#
654+
tiledloopbody = Expr(:block, )
655+
# else
656+
# Expr(:block, Expr(:(=), unrolled, 0))
657+
# end
658658
lower_unrolled!(tiledloopbody, ls, U, Tt, W, static_unroll, unrolled_iter, unrolled_itersym)
659+
tiledloopbody = lower_nest(ls, nloops, U, T, tiledloopbody, 0, W, nothing, :block)
660+
push!(q.args, Texprtype === :block ? tiledloopbody : Expr(Texprtype, looprange(ls, tiled, Tt, tiledsym(tiled)), tiledloopbody))
659661
if static_tile
660662
Tt = if Tt == T
661-
push!(tiledloopbody.args, Expr(:+=, mangledtiled, Tt))
663+
# push!(tiledloopbody.args, Expr(:+=, mangledtiled, Tt))
662664
Texprtype = :block
663665
looprangehint(ls, tiled) % T
664666
else
@@ -668,7 +670,7 @@ function lower_tiled(ls::LoopSet, U::Int, T::Int)
668670
else
669671
Ttold = Tt
670672
Tt >>>= 1
671-
Tt == 0 || push!(tiledloopbody.args, Expr(:+=, mangledtiled, Ttold))
673+
# Tt == 0 || push!(tiledloopbody.args, Expr(:+=, mangledtiled, Ttold))
672674
Texprtype = 2Tt == Ttold ? :if : :while
673675
nothing
674676
end

test/runtests.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,13 @@ myvaravx!(x2, A, x̄)
407407
@benchmark myvaravx!($x2, $A, $x̄)
408408

409409

410+
a = rand(37); B = rand(37, 47); c = rand(47)';
411+
412+
d1 = @. a + B * c;
413+
d2 = @avx @. a + B * c;
414+
415+
@test all(d1 .≈ d2)
416+
410417
using SIMDPirates
411418
function mycolsum2!(
412419
means::AbstractVector{T}, sample::AbstractArray{T}

0 commit comments

Comments
 (0)