Skip to content

Commit ce717a1

Browse files
committed
Accidentally bumped version too high, and added support for transposed destinations.
1 parent fa0505b commit ce717a1

File tree

3 files changed

+40
-3
lines changed

3 files changed

+40
-3
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.1.5"
4+
version = "0.1.4"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

src/broadcast.jl

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,11 @@ end
161161
@generated function vmaterialize!(
162162
dest::AbstractArray{T,N}, bc::BC
163163
) where {T <: Union{Float32,Float64}, N, BC <: Broadcasted}
164-
# ) where {N, T, BC <: Broadcasted}
165164
# we have an N dimensional loop.
166165
# need to construct the LoopSet
167166
loopsyms = [gensym(:n) for n 1:N]
168167
ls = LoopSet()
169-
sizes = Expr(:tuple,)
168+
sizes = Expr(:tuple)
170169
for (n,itersym) enumerate(loopsyms)
171170
Nsym = gensym(:N)
172171
ls.loops[itersym] = Loop(itersym, Nsym)
@@ -183,6 +182,31 @@ end
183182
q
184183
# ls
185184
end
185+
@generated function vmaterialize!(
186+
dest′::Union{Adjoint{T,A},Transpose{T,A}}, bc::BC
187+
) where {T <: Union{Float32,Float64}, N, A <: AbstractArray{T,N}, BC <: Broadcasted}
188+
# we have an N dimensional loop.
189+
# need to construct the LoopSet
190+
loopsyms = [gensym(:n) for n 1:N]
191+
ls = LoopSet()
192+
pushpreamble!(ls, Expr(:(=), :dest, Expr(:call, :parent, :dest′)))
193+
sizes = Expr(:tuple)
194+
for (n,itersym) enumerate(loopsyms)
195+
Nsym = gensym(:N)
196+
ls.loops[itersym] = Loop(itersym, Nsym)
197+
push!(sizes.args, Nsym)
198+
end
199+
pushpreamble!(ls, Expr(:(=), sizes, Expr(:call, :size, :dest′)))
200+
elementbytes = sizeof(T)
201+
add_broadcast!(ls, :dest, :bc, loopsyms, BC, elementbytes)
202+
add_store!(ls, :dest, ArrayReference(:dest, reverse(loopsyms), Ref{Bool}(false)), elementbytes)
203+
resize!(ls.loop_order, num_loops(ls)) # num_loops may be greater than N, eg Product
204+
q = lower(ls)
205+
push!(q.args, :dest′)
206+
pushfirst!(q.args, Expr(:meta,:inline))
207+
q
208+
# ls
209+
end
186210

187211
function vmaterialize(bc::Broadcasted)
188212
ElType = Base.Broadcast.combine_eltypes(bc.f, bc.args)

test/runtests.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,19 @@ end
367367
@. D3 = exp(Bt)
368368
@avx @. D4 = exp(Bt)
369369
@test D3 D4
370+
371+
D1 = similar(B); D2 = similar(B)
372+
D1t = Transpose(D1)
373+
D2t = Transpose(D2)
374+
@. D1t = exp(Bt)
375+
@avx @. D2t = exp(Bt)
376+
@test D1t D2t
377+
378+
fill!(D1, -1e3)
379+
fill!(D2, 9e9)
380+
@. D1' = exp(Bt)
381+
@avx @. D2' = exp(Bt)
382+
@test D1 D2
370383
end
371384
end
372385

0 commit comments

Comments
 (0)