Skip to content

Commit 6b21805

Browse files
committed
Small number of bug fixes.
1 parent 8657d6b commit 6b21805

File tree

4 files changed

+9
-4
lines changed

4 files changed

+9
-4
lines changed

src/add_compute.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ function add_compute!(
204204
if mpref == argref
205205
if varname(mpref) === var
206206
reduction_ind = ind
207-
add_load!(ls, argref, elementbytes)
207+
mergesetv!(deps, loopdependencies(add_load!(ls, argref, elementbytes)))
208208
else
209209
pushparent!(vparents, deps, reduceddeps, add_load!(ls, argref, elementbytes))
210210
end

src/add_constants.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,11 @@ function add_constant!(ls::LoopSet, mpref::ArrayReferenceMetaPosition, elementby
3939
op = Operation(length(operations(ls)), varname(mpref), elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS, mpref.mref)
4040
add_vptr!(ls, op)
4141
temp = gensym(:intermediateconstref)
42-
pushpreamble!(ls, Expr(:(=), temp, Expr(:call, lv(:vload), mpref.mref.ptr, mem_offset(op, UnrollArgs(0, Symbol(""), Symbol(""), nothing)))))
42+
vloadcall = Expr(:call, lv(:vload), mpref.mref.ptr)
43+
if length(getindices(op)) > 0
44+
push!(vloadcall.args, mem_offset(op, UnrollArgs(0, Symbol(""), Symbol(""), nothing)))
45+
end
46+
pushpreamble!(ls, Expr(:(=), temp, vloadcall))
4347
pushpreamble!(ls, op, temp)
4448
pushop!(ls, op, temp)
4549
end

src/reconstruct_loopset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ function _avx_loopset(OPSsv, ARFsv, AMsv, LPSYMsv, LBsv, vargs)
414414
)
415415
end
416416
@generated function _avx_!(::Val{UT}, ::Type{OPS}, ::Type{ARF}, ::Type{AM}, ::Type{LPSYM}, lb::LB, vargs...) where {UT, OPS, ARF, AM, LPSYM, LB}
417-
1 + 1 # Irrelevant line you can comment out/in to force recompilation...
417+
# 1 + 1 # Irrelevant line you can comment out/in to force recompilation...
418418
ls = _avx_loopset(OPS.parameters, ARF.parameters, AM.parameters, LPSYM.parameters, LB.parameters, vargs)
419419
avx_body(ls, UT)
420420
end

src/split_loops.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ function lower_and_split_loops(ls::LoopSet)
7070
remaining_ops[1:ind-1] .= @view(split_candidates[1:ind-1]); remaining_ops[ind:end] .= @view(split_candidates[ind+1:end])
7171
ls_2 = split_loopset(ls, remaining_ops)
7272
order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, cost_2 = choose_order_cost(ls_2)
73-
U_1 = T_1 = U_2 = T_2 = 2
73+
# U_1 = T_1 = U_2 = T_2 = 2
7474
if cost_1 + cost_2 cost_fused
75+
# @show cost_1, cost_2 cost_fused
7576
ls_2_lowered = if length(remaining_ops) > 1
7677
lower_and_split_loops(ls_2)
7778
else

0 commit comments

Comments
 (0)