Skip to content

Commit 150d337

Browse files
committed
Added missing sizeequivalent methods, and fixed bug in copying code. fixes #48.
1 parent 6d08983 commit 150d337

File tree

4 files changed

+153
-84
lines changed

4 files changed

+153
-84
lines changed

src/add_stores.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ function add_copystore!(
2121
ls::LoopSet, parent::Operation, mpref::ArrayReferenceMetaPosition, elementbytes::Int
2222
)
2323
op = add_compute!(ls, gensym(), :identity, [parent], elementbytes)
24+
pushfirst!(mpref.parents, parent)
2425
add_store!(ls, name(op), mpref, elementbytes, op)
2526
end
2627

@@ -42,8 +43,6 @@ function add_store!(
4243
if ref == opp.ref.ref
4344
id = opp.identifier
4445
break
45-
# else
46-
# @show ref opp.ref.ref
4746
end
4847
end
4948
add_pvar = false

src/graphs.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,8 @@ function add_operation!(
466466
array, rawindices = ref_from_expr(RHS)
467467
RHS_ref = array_reference_meta!(ls, array, rawindices, elementbytes)
468468
op = add_load!(ls, gensym(LHS_sym), RHS_ref, elementbytes)
469-
add_compute!(ls, LHS_sym, :identity, [op], elementbytes)
469+
iop = add_compute!(ls, LHS_sym, :identity, [op], elementbytes)
470+
pushfirst!(LHS_ref.parents, iop)
470471
elseif RHS.head === :call
471472
f = first(RHS.args)
472473
if f === :getindex

src/lowering.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -360,21 +360,21 @@ end
360360
@inline sizeequivalentfloat(::Type{Float16}, x::Float64) = Float16(x)
361361
@inline sizeequivalentfloat(::Type{Float16}, x::Float32) = Float16(x)
362362
@inline sizeequivalentint(::Type{T}, x::T) where {T} = x
363-
@inline sizeequivalentint(::Type{Int64}, x::Int64) = x
364363
@inline sizeequivalentint(::Type{Int64}, x::Int32) = Int64(x)
365364
@inline sizeequivalentint(::Type{Int64}, x::Int16) = Int64(x)
366365
@inline sizeequivalentint(::Type{Int32}, x::Int64) = Int32(x)
367-
@inline sizeequivalentint(::Type{Int32}, x::Int32) = x
368366
@inline sizeequivalentint(::Type{Int32}, x::Int16) = Int32(x)
369367
@inline sizeequivalentint(::Type{Int16}, x::Int64) = Int16(x)
370368
@inline sizeequivalentint(::Type{Int16}, x::Int32) = Int16(x)
371-
@inline sizeequivalentint(::Type{Int16}, x::Int16) = x
369+
@inline sizeequivalentint(::Type{Float64}, x::Int64) = x
372370
@inline sizeequivalentint(::Type{Float64}, x::Int32) = Int64(x)
373371
@inline sizeequivalentint(::Type{Float64}, x::Int16) = Int64(x)
374372
@inline sizeequivalentint(::Type{Float32}, x::Int64) = Int32(x)
373+
@inline sizeequivalentint(::Type{Float32}, x::Int32) = x
375374
@inline sizeequivalentint(::Type{Float32}, x::Int16) = Int32(x)
376375
@inline sizeequivalentint(::Type{Float16}, x::Int64) = Int16(x)
377376
@inline sizeequivalentint(::Type{Float16}, x::Int32) = Int16(x)
377+
@inline sizeequivalentint(::Type{Float16}, x::Int16) = x
378378

379379

380380
function setup_preamble!(ls::LoopSet, W::Symbol, typeT::Symbol, vectorized::Symbol, unrolled::Symbol, tiled::Symbol, U::Int)

test/runtests.jl

Lines changed: 147 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -957,49 +957,6 @@ end
957957
softmax3_core_avx4!(lse, qq, xx, tmpmax, maxk, nk)
958958
end
959959

960-
function copyavx1!(x, y)
961-
@avx for i eachindex(x)
962-
x[i] = y[i]
963-
end
964-
end
965-
function copy_avx1!(x, y)
966-
@_avx for i eachindex(x)
967-
x[i] = y[i]
968-
end
969-
end
970-
function copyavx2!(x, y)
971-
@avx for i eachindex(x)
972-
yᵢ = y[i]
973-
x[i] = yᵢ
974-
end
975-
end
976-
function copy_avx2!(x, y)
977-
@_avx for i eachindex(x)
978-
yᵢ = y[i]
979-
x[i] = yᵢ
980-
end
981-
end
982-
function make2point3avx!(x)
983-
@avx for i eachindex(x)
984-
x[i] = 2.3
985-
end
986-
end
987-
function make2point3_avx!(x)
988-
@_avx for i eachindex(x)
989-
x[i] = 2.3
990-
end
991-
end
992-
function myfillavx!(x, a)
993-
@avx for i eachindex(x)
994-
x[i] = a
995-
end
996-
end
997-
function myfill_avx!(x, a)
998-
@_avx for i eachindex(x)
999-
x[i] = a
1000-
end
1001-
end
1002-
1003960
function mysumavx(x)
1004961
s = zero(eltype(x))
1005962
@avx for i eachindex(x)
@@ -1145,41 +1102,6 @@ end
11451102
@test q1 q2
11461103
@test sum(q2; dims=3) ones(T,ni,nj)
11471104

1148-
fill!(q2, NaN); copyavx1!(q2, x)
1149-
@test x == q2
1150-
fill!(q2, NaN); copy_avx1!(q2, x)
1151-
@test x == q2
1152-
fill!(q2, NaN); copyavx2!(q2, x)
1153-
@test x == q2
1154-
fill!(q2, NaN); copy_avx2!(q2, x)
1155-
@test x == q2
1156-
fill!(q2, NaN); @avx q2 .= x;
1157-
@test x == q2
1158-
1159-
myfillavx!(x, -9829732.153);
1160-
fill!(q2, -9829732.153);
1161-
@test x == q2
1162-
myfill_avx!(x, 9732.153);
1163-
fill!(q2, 9732.153);
1164-
@test x == q2
1165-
myfill_avx!(x, 5);
1166-
fill!(q2, 5)
1167-
@test x == q2
1168-
myfillavx!(x, 5345);
1169-
fill!(q2, 5345)
1170-
@test x == q2
1171-
make2point3avx!(x)
1172-
fill!(q2, 2.3)
1173-
@test x == q2
1174-
fill!(x, NaN); make2point3_avx!(x)
1175-
@test x == q2
1176-
@avx x .= 34;
1177-
fill!(q2, 34)
1178-
@test x == q2
1179-
@avx x .= 34.242;
1180-
fill!(q2, 34.242)
1181-
@test x == q2
1182-
11831105
s = sum(x)
11841106
@test s mysumavx(x)
11851107
@test s mysum_avx(x)
@@ -1199,6 +1121,153 @@ end
11991121
end
12001122
end
12011123

1124+
@time @testset "copy" begin
1125+
1126+
function copyavx1!(x, y)
1127+
@avx for i eachindex(x)
1128+
x[i] = y[i]
1129+
end
1130+
end
1131+
function copy_avx1!(x, y)
1132+
@_avx for i eachindex(x)
1133+
x[i] = y[i]
1134+
end
1135+
end
1136+
function copyavx2!(x, y)
1137+
@avx for i eachindex(x)
1138+
yᵢ = y[i]
1139+
x[i] = yᵢ
1140+
end
1141+
end
1142+
function copy_avx2!(x, y)
1143+
@_avx for i eachindex(x)
1144+
yᵢ = y[i]
1145+
x[i] = yᵢ
1146+
end
1147+
end
1148+
function offset_copy!(A, B)
1149+
@inbounds for i=1:size(A,1), j=1:size(B,2)
1150+
A[i,j+2] = B[i,j]
1151+
end
1152+
end
1153+
function offset_copyavx1!(A, B)
1154+
@avx for i=1:size(A,1), j=1:size(B,2)
1155+
A[i,j+2] = B[i,j]
1156+
end
1157+
end
1158+
function offset_copy_avx1!(A, B)
1159+
@_avx for i=1:size(A,1), j=1:size(B,2)
1160+
A[i,j+2] = B[i,j]
1161+
end
1162+
end
1163+
function offset_copyavx2!(A, B)
1164+
@avx for i=1:size(A,1), j=1:size(B,2)
1165+
Bᵢⱼ = B[i,j]
1166+
A[i,j+2] = Bᵢⱼ
1167+
end
1168+
end
1169+
function offset_copy_avx2!(A, B)
1170+
@_avx for i=1:size(A,1), j=1:size(B,2)
1171+
Bᵢⱼ = B[i,j]
1172+
A[i,j+2] = Bᵢⱼ
1173+
end
1174+
end
1175+
function make2point3avx!(x)
1176+
@avx for i eachindex(x)
1177+
x[i] = 2.3
1178+
end
1179+
end
1180+
function make2point3_avx!(x)
1181+
@_avx for i eachindex(x)
1182+
x[i] = 2.3
1183+
end
1184+
end
1185+
function make23avx!(x)
1186+
@avx for i eachindex(x)
1187+
x[i] = 23
1188+
end
1189+
end
1190+
function make23_avx!(x)
1191+
@_avx for i eachindex(x)
1192+
x[i] = 23
1193+
end
1194+
end
1195+
function myfillavx!(x, a)
1196+
@avx for i eachindex(x)
1197+
x[i] = a
1198+
end
1199+
end
1200+
function myfill_avx!(x, a)
1201+
@_avx for i eachindex(x)
1202+
x[i] = a
1203+
end
1204+
end
1205+
1206+
for T (Float32, Float64, Int32, Int64)
1207+
@show T, @__LINE__
1208+
R = T <: Integer ? (-T(100):T(100)) : T
1209+
x = rand(R, 237);
1210+
q1 = similar(x); q2 = similar(x);
1211+
1212+
fill!(q2, -999999); copyavx1!(q2, x);
1213+
@test x == q2
1214+
fill!(q2, -999999); copy_avx1!(q2, x);
1215+
@test x == q2
1216+
fill!(q2, -999999); copyavx2!(q2, x);
1217+
@test x == q2
1218+
fill!(q2, -999999); copy_avx2!(q2, x);
1219+
@test x == q2
1220+
fill!(q2, -999999); @avx q2 .= x;
1221+
@test x == q2
1222+
1223+
B = rand(R, 79, 83);
1224+
A1 = zeros(T, 79, 85);
1225+
A2 = zeros(T, 79, 85);
1226+
offset_copy!(A1, B);
1227+
fill!(A2, 0); offset_copyavx1!(A2, B);
1228+
@test A1 == A2
1229+
fill!(A2, 0); offset_copyavx2!(A2, B);
1230+
@test A1 == A2
1231+
fill!(A2, 0); offset_copy_avx1!(A2, B);
1232+
@test A1 == A2
1233+
fill!(A2, 0); offset_copy_avx2!(A2, B);
1234+
@test A1 == A2
1235+
1236+
a = rand(R)
1237+
myfillavx!(x, a);
1238+
fill!(q2, a);
1239+
@test x == q2
1240+
a = rand(R)
1241+
myfill_avx!(x, a);
1242+
fill!(q2, a);
1243+
@test x == q2
1244+
a = rand(R)
1245+
myfill_avx!(x, a);
1246+
fill!(q2, a);
1247+
@test x == q2
1248+
a = rand(R)
1249+
myfillavx!(x, a);
1250+
fill!(q2, a);
1251+
@test x == q2
1252+
if T <: Union{Float32,Float64}
1253+
make2point3avx!(x)
1254+
fill!(q2, 2.3)
1255+
@test x == q2
1256+
fill!(x, -999999); make2point3_avx!(x)
1257+
@test x == q2
1258+
end
1259+
a = rand(R)
1260+
@avx x .= a;
1261+
fill!(q2, a);
1262+
@test x == q2
1263+
a = rand(R)
1264+
@avx x .= a;
1265+
fill!(q2, a);
1266+
@test x == q2
1267+
1268+
end
1269+
end
1270+
12021271
@time @testset "broadcast" begin
12031272
M, N = 37, 47
12041273
# M = 77;

0 commit comments

Comments
 (0)