Skip to content

Commit 39dc532

Browse files
committed
Fix bug where the LHS ref was used instead of the RHS on one-line load-stores, and insert an extra symbol with a new name so that they hve separate entries in syms/refs_aliasing_refs/syms. Fixes #35.
1 parent 32bbaf4 commit 39dc532

File tree

6 files changed

+53
-14
lines changed

6 files changed

+53
-14
lines changed

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ os:
44
- linux
55
- osx
66
julia:
7+
- 1.1
78
- 1.3
89
- nightly
910
matrix:

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Parameters = "0"
1717
SIMDPirates = "0.3"
1818
SLEEFPirates = "0.3"
1919
VectorizationBase = "0.2"
20-
julia = "1.3"
20+
julia = "1.1"
2121

2222
[extras]
2323
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

benchmark/driver.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ AtmulB_bench = fetch(AtmulB_future)
4747
AmulBt_bench = fetch(AmulBt_future)
4848
Atmulvb_bench = fetch(Atmulvb_future)
4949

50-
v = 3
50+
v = 4
5151
const PICTURES = "/home/chriselrod/Pictures"
5252
save(joinpath(PICTURES, "bench_gemm_v$v.png"), plot(gemm_bench));
5353
save(joinpath(PICTURES, "bench_AtmulB_v$v.png"), plot(AtmulB_bench));

src/add_stores.jl

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,43 @@ function add_store!(ls::LoopSet, op::Operation)
1313
id = op.identifier
1414
id == nops ? add_unique_store!(ls, op) : cse_store!(ls, op)
1515
end
16+
function add_copystore!(
17+
ls::LoopSet, parent::Operation, mpref::ArrayReferenceMetaPosition, elementbytes::Int
18+
)
19+
op = add_compute!(ls, gensym(), :identity, [parent], elementbytes)
20+
add_store!(ls, name(op), mpref, elementbytes, op)
21+
end
22+
1623
function add_store!(
17-
ls::LoopSet, var::Symbol, mpref::ArrayReferenceMetaPosition, elementbytes::Int = 8
24+
ls::LoopSet, var::Symbol, mpref::ArrayReferenceMetaPosition, elementbytes::Int = 8, parent = getop(ls, var, mpref.loopdependencies, elementbytes)
1825
)
26+
isload(parent) && return add_copystore!(ls, parent, mpref, elementbytes)
1927
parents = mpref.parents
2028
ldref = mpref.loopdependencies
2129
reduceddeps = mpref.reduceddeps
22-
parent = getop(ls, var, ldref, elementbytes)
23-
pvar = parent.variable
30+
pvar = name(parent)
2431
id = length(ls.operations)
2532
if pvar ls.syms_aliasing_refs
2633
push!(ls.syms_aliasing_refs, pvar)
2734
push!(ls.refs_aliasing_syms, mpref.mref)
2835
else
2936
# try to cse store, by replacing the previous one
30-
ref = mpref.mref.ref
37+
ref = mpref.mref.ref
3138
for opp operations(ls)
3239
isstore(opp) || continue
3340
if ref == opp.ref.ref
3441
id = opp.identifier
3542
break
43+
else
44+
@show ref opp.ref.ref
3645
end
3746
end
3847
end
3948
pushparent!(parents, ldref, reduceddeps, parent)
4049
op = Operation( id, name(mpref), elementbytes, :setindex!, memstore, mpref )#loopdependencies, reduceddeps, parents, mpref.mref )
4150
add_store!(ls, op)
4251
end
52+
4353
function add_store!(
4454
ls::LoopSet, var::Symbol, array::Symbol, rawindices, elementbytes::Int = 8
4555
)

src/graphs.jl

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -192,12 +192,6 @@ function save_tilecost!(ls::LoopSet)
192192
end
193193

194194

195-
# function op_to_ref(ls::LoopSet, op::Operation)
196-
# s = op.variable
197-
# id = findfirst(ls.syms_aliasing_regs)
198-
# @assert id !== nothing
199-
# ls.refs_aliasing_syms[id]
200-
# end
201195
function pushpreamble!(ls::LoopSet, op::Operation, v::Symbol)
202196
if v !== mangledvar(op)
203197
push!(ls.preamble_symsym, (identifier(op),v))
@@ -450,7 +444,10 @@ function add_operation!(
450444
ls::LoopSet, LHS_sym::Symbol, RHS::Expr, LHS_ref::ArrayReferenceMetaPosition, elementbytes::Int = 8
451445
)
452446
if RHS.head === :ref# || (RHS.head === :call && first(RHS.args) === :getindex)
453-
add_load!(ls, LHS_sym, LHS_ref, elementbytes)
447+
array, rawindices = ref_from_expr(RHS)
448+
RHS_ref = array_reference_meta!(ls, array, rawindices, elementbytes)
449+
op = add_load!(ls, gensym(LHS_sym), RHS_ref, elementbytes)
450+
add_compute!(ls, LHS_sym, :identity, [op], elementbytes)
454451
elseif RHS.head === :call
455452
f = first(RHS.args)
456453
if f === :getindex

test/runtests.jl

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1285,6 +1285,29 @@ end
12851285
lse, qq, xx, tmpmax, maxk, nk = softmax3_setup!(q, lse, tmpmax, x, maxk)
12861286
softmax3_core_avx2!(lse, qq, xx, tmpmax, maxk, nk)
12871287
end
1288+
1289+
function copyavx1!(x, y)
1290+
@avx for i eachindex(x)
1291+
x[i] = y[i]
1292+
end
1293+
end
1294+
function copy_avx1!(x, y)
1295+
@_avx for i eachindex(x)
1296+
x[i] = y[i]
1297+
end
1298+
end
1299+
function copyavx2!(x, y)
1300+
@avx for i eachindex(x)
1301+
yᵢ = y[i]
1302+
x[i] = yᵢ
1303+
end
1304+
end
1305+
function copy_avx2!(x, y)
1306+
@_avx for i eachindex(x)
1307+
yᵢ = y[i]
1308+
x[i] = yᵢ
1309+
end
1310+
end
12881311

12891312
for T (Float32, Float64)
12901313
@show T, @__LINE__
@@ -1366,7 +1389,15 @@ end
13661389
fill!(q2, 0); fill!(lse, 0); softmax3_avx2!(q2, lse, tmpmax, x);
13671390
@test q1 q2
13681391
@test sum(q2; dims=3) ones(T,ni,nj)
1369-
1392+
1393+
fill!(q2, NaN); copyavx1!(q2, x)
1394+
@test x == q2
1395+
fill!(q2, NaN); copy_avx1!(q2, x)
1396+
@test x == q2
1397+
fill!(q2, NaN); copyavx2!(q2, x)
1398+
@test x == q2
1399+
fill!(q2, NaN); copy_avx2!(q2, x)
1400+
@test x == q2
13701401
end
13711402
end
13721403

0 commit comments

Comments
 (0)