Skip to content

Commit 02fca62

Browse files
committed
A few miscellaneous bug fixes.
1 parent ec09ec7 commit 02fca62

File tree

5 files changed

+30
-19
lines changed

5 files changed

+30
-19
lines changed

src/add_compute.jl

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -267,15 +267,19 @@ end
267267
function add_pow!(
268268
ls::LoopSet, var::Symbol, x, p::Real, elementbytes::Int, position::Int
269269
)
270-
xop = if x isa Expr
270+
xop::Operation = if x isa Expr
271271
add_operation!(ls, gensym(:xpow), x, elementbytes, position)
272272
elseif x isa Symbol
273-
xo = get(ls.opdict, x, nothing)
274-
if isnothing(xo)
275-
pushpreamble!(ls, Expr(:(=), var, Expr(:call, :(^), x, p)))
276-
return add_constant!(ls, var, elementbytes)
273+
if x ls.loopsymbols
274+
add_loopvalue!(ls, x, elementbytes)
275+
else
276+
xo = get(ls.opdict, x, nothing)
277+
if isnothing(xo)
278+
pushpreamble!(ls, Expr(:(=), var, Expr(:call, :(^), x, p)))
279+
return add_constant!(ls, var, elementbytes)
280+
end
281+
xo
277282
end
278-
xo
279283
elseif x isa Number
280284
pushpreamble!(ls, Expr(:(=), var, x ^ p))
281285
return add_constant!(ls, var, elementbytes)

src/broadcast.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ end
250250
pushfirst!(q.args, Expr(:meta,:inline))
251251
# @show q
252252
q
253-
# ls
253+
# ls
254254
end
255255
@generated function vmaterialize!(
256256
dest′::Union{Adjoint{T,A},Transpose{T,A}}, bc::BC, ::Val{Mod}

src/lowering.jl

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,21 @@ function lower_block(
8282
for prepost 1:2
8383
# !u₁ && !u₂
8484
lower!(blockq, ops[1,1,prepost,n], vectorized, ls, u₁loop, u₂loop, u₁, nothing, mask)
85+
if u₁ == 4
86+
lower!(blockq, ops[2,1,prepost,n], vectorized, ls, u₁loop, u₂loop, u₁, nothing, mask)
87+
end
8588
opsv1 = ops[1,2,prepost,n]
8689
opsv2 = ops[2,2,prepost,n]
8790
if length(opsv1) + length(opsv2) > 0
91+
# if u₁ == 3
92+
# lower!(blockq, ops[2,1,prepost,n], vectorized, ls, u₁loop, u₂loop, u₁, nothing, mask)
93+
# end
8894
for store (false,true)
8995
# let store = nothing
9096
nstores = 0
9197
iszero(length(opsv1)) || (nstores += sum(isstore, opsv1))
9298
iszero(length(opsv2)) || (nstores += sum(isstore, opsv2))
99+
93100
for t 0:u₂-1
94101
if t == 0
95102
push!(blockq.args, Expr(:(=), u₂loop, tiledsym(u₂loop)))
@@ -103,7 +110,7 @@ function lower_block(
103110
else # !u₁ && u₂
104111
lower!(blockq, opsv1, vectorized, ls, u₁loop, u₂loop, u₁, t, mask, store)
105112
end
106-
if iszero(t) && !store # u₁ && !u₂
113+
if iszero(t) && !store && u₁ != 4 # u₁ && !u₂
107114
# for u ∈ 0:u₁-1
108115
lower!(blockq, ops[2,1,prepost,n], vectorized, ls, u₁loop, u₂loop, u₁, nothing, mask)
109116
# end
@@ -120,7 +127,7 @@ function lower_block(
120127
end
121128
nstores == 0 && break
122129
end
123-
else
130+
elseif u₁ != 4
124131
# for u ∈ 0:u₁-1 # u₁ && !u₂
125132
lower!(blockq, ops[2,1,prepost,n], vectorized, ls, u₁loop, u₂loop, u₁, nothing, mask)
126133
# end
@@ -303,13 +310,13 @@ function determine_width(ls::LoopSet, vectorized::Symbol)
303310
push!(vwidth_q.args, Expr(:call, Expr(:curly, :Val, length(vloop))))
304311
end
305312
# push!(vwidth_q.args, ls.T)
306-
if length(ls.includedactualarrays) < 2
307-
push!(vwidth_q.args, ls.T)
308-
else
309-
for array ls.includedactualarrays
310-
push!(vwidth_q.args, Expr(:call, :eltype, array))
311-
end
312-
end
313+
# if length(ls.includedactualarrays) < 2
314+
push!(vwidth_q.args, ls.T)
315+
# else
316+
# for array ∈ ls.includedactualarrays
317+
# push!(vwidth_q.args, Expr(:call, :eltype, array))
318+
# end
319+
# end
313320
vwidth_q
314321
end
315322
function init_remblock(unrolledloop::Loop, u₁loop::Symbol = unrolledloop.itersymbol)
@@ -353,8 +360,8 @@ function setup_preamble!(ls::LoopSet, us::UnrollSpecification)
353360
W = ls.W; typeT = ls.T
354361
if length(ls.includedarrays) > 0
355362
push!(ls.preamble.args, Expr(:(=), typeT, determine_eltype(ls)))
356-
push!(ls.preamble.args, Expr(:(=), W, determine_width(ls, vectorized)))
357363
end
364+
push!(ls.preamble.args, Expr(:(=), W, determine_width(ls, vectorized)))
358365
lower_licm_constants!(ls)
359366
pushpreamble!(ls, definemask(getloop(ls, vectorized), W))#, u₁ > 1 && u₁loopnum == vectorizedloopnum))
360367
for op operations(ls)

src/memory_ops_common.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ function checkforoffset!(
8787
factor = f === :+ ? 1 : -1
8888
arg1 = ind.args[2]
8989
arg2 = ind.args[3]
90-
if arg1 isa Integer
90+
if arg1 isa Integer && isone(factor)
9191
if arg2 isa Symbol && arg2 ls.loopsymbols
9292
addoffset!(ls, indices, offsets, loopedindex, loopdependencies, arg2, arg1 * factor)
9393
else

src/reconstruct_loopset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ function avx_loopset(instr, ops, arf, AM, LPSYM, LB, @nospecialize(vargs))
388388
expandedv = [isexpanded(ls, ops, nopsv, i) for i eachindex(ops)]
389389
mrefs = create_mrefs!(ls, arf, arraysymbolinds, opsymbols, nopsv, expandedv, vargs)
390390
pushpreamble!(ls, Expr(:(=), ls.T, Expr(:call, :promote_type, [Expr(:call, :eltype, vptr(mref)) for mref mrefs]...)))
391-
pushpreamble!(ls, Expr(:(=), ls.W, Expr(:call, lv(:pick_vector_width_val), [Expr(:call, :eltype, vptr(mref)) for mref mrefs]...)))
391+
# pushpreamble!(ls, Expr(:(=), ls.W, Expr(:call, lv(:pick_vector_width_val), [Expr(:call, :eltype, vptr(mref)) for mref ∈ mrefs]...)))
392392
num_params = num_arrays + num_parameters(AM)
393393
add_ops!(ls, instr, ops, mrefs, opsymbols, num_params, nopsv, expandedv, elementbytes)
394394
process_metadata!(ls, AM, length(arf))

0 commit comments

Comments
 (0)