957
957
softmax3_core_avx4! (lse, qq, xx, tmpmax, maxk, nk)
958
958
end
959
959
960
- function copyavx1! (x, y)
961
- @avx for i ∈ eachindex (x)
962
- x[i] = y[i]
963
- end
964
- end
965
- function copy_avx1! (x, y)
966
- @_avx for i ∈ eachindex (x)
967
- x[i] = y[i]
968
- end
969
- end
970
- function copyavx2! (x, y)
971
- @avx for i ∈ eachindex (x)
972
- yᵢ = y[i]
973
- x[i] = yᵢ
974
- end
975
- end
976
- function copy_avx2! (x, y)
977
- @_avx for i ∈ eachindex (x)
978
- yᵢ = y[i]
979
- x[i] = yᵢ
980
- end
981
- end
982
- function make2point3avx! (x)
983
- @avx for i ∈ eachindex (x)
984
- x[i] = 2.3
985
- end
986
- end
987
- function make2point3_avx! (x)
988
- @_avx for i ∈ eachindex (x)
989
- x[i] = 2.3
990
- end
991
- end
992
- function myfillavx! (x, a)
993
- @avx for i ∈ eachindex (x)
994
- x[i] = a
995
- end
996
- end
997
- function myfill_avx! (x, a)
998
- @_avx for i ∈ eachindex (x)
999
- x[i] = a
1000
- end
1001
- end
1002
-
1003
960
function mysumavx (x)
1004
961
s = zero (eltype (x))
1005
962
@avx for i ∈ eachindex (x)
@@ -1145,41 +1102,6 @@ end
1145
1102
@test q1 ≈ q2
1146
1103
@test sum (q2; dims= 3 ) ≈ ones (T,ni,nj)
1147
1104
1148
- fill! (q2, NaN ); copyavx1! (q2, x)
1149
- @test x == q2
1150
- fill! (q2, NaN ); copy_avx1! (q2, x)
1151
- @test x == q2
1152
- fill! (q2, NaN ); copyavx2! (q2, x)
1153
- @test x == q2
1154
- fill! (q2, NaN ); copy_avx2! (q2, x)
1155
- @test x == q2
1156
- fill! (q2, NaN ); @avx q2 .= x;
1157
- @test x == q2
1158
-
1159
- myfillavx! (x, - 9829732.153 );
1160
- fill! (q2, - 9829732.153 );
1161
- @test x == q2
1162
- myfill_avx! (x, 9732.153 );
1163
- fill! (q2, 9732.153 );
1164
- @test x == q2
1165
- myfill_avx! (x, 5 );
1166
- fill! (q2, 5 )
1167
- @test x == q2
1168
- myfillavx! (x, 5345 );
1169
- fill! (q2, 5345 )
1170
- @test x == q2
1171
- make2point3avx! (x)
1172
- fill! (q2, 2.3 )
1173
- @test x == q2
1174
- fill! (x, NaN ); make2point3_avx! (x)
1175
- @test x == q2
1176
- @avx x .= 34 ;
1177
- fill! (q2, 34 )
1178
- @test x == q2
1179
- @avx x .= 34.242 ;
1180
- fill! (q2, 34.242 )
1181
- @test x == q2
1182
-
1183
1105
s = sum (x)
1184
1106
@test s ≈ mysumavx (x)
1185
1107
@test s ≈ mysum_avx (x)
@@ -1199,6 +1121,153 @@ end
1199
1121
end
1200
1122
end
1201
1123
1124
+ @time @testset " copy" begin
1125
+
1126
+ function copyavx1! (x, y)
1127
+ @avx for i ∈ eachindex (x)
1128
+ x[i] = y[i]
1129
+ end
1130
+ end
1131
+ function copy_avx1! (x, y)
1132
+ @_avx for i ∈ eachindex (x)
1133
+ x[i] = y[i]
1134
+ end
1135
+ end
1136
+ function copyavx2! (x, y)
1137
+ @avx for i ∈ eachindex (x)
1138
+ yᵢ = y[i]
1139
+ x[i] = yᵢ
1140
+ end
1141
+ end
1142
+ function copy_avx2! (x, y)
1143
+ @_avx for i ∈ eachindex (x)
1144
+ yᵢ = y[i]
1145
+ x[i] = yᵢ
1146
+ end
1147
+ end
1148
+ function offset_copy! (A, B)
1149
+ @inbounds for i= 1 : size (A,1 ), j= 1 : size (B,2 )
1150
+ A[i,j+ 2 ] = B[i,j]
1151
+ end
1152
+ end
1153
+ function offset_copyavx1! (A, B)
1154
+ @avx for i= 1 : size (A,1 ), j= 1 : size (B,2 )
1155
+ A[i,j+ 2 ] = B[i,j]
1156
+ end
1157
+ end
1158
+ function offset_copy_avx1! (A, B)
1159
+ @_avx for i= 1 : size (A,1 ), j= 1 : size (B,2 )
1160
+ A[i,j+ 2 ] = B[i,j]
1161
+ end
1162
+ end
1163
+ function offset_copyavx2! (A, B)
1164
+ @avx for i= 1 : size (A,1 ), j= 1 : size (B,2 )
1165
+ Bᵢⱼ = B[i,j]
1166
+ A[i,j+ 2 ] = Bᵢⱼ
1167
+ end
1168
+ end
1169
+ function offset_copy_avx2! (A, B)
1170
+ @_avx for i= 1 : size (A,1 ), j= 1 : size (B,2 )
1171
+ Bᵢⱼ = B[i,j]
1172
+ A[i,j+ 2 ] = Bᵢⱼ
1173
+ end
1174
+ end
1175
+ function make2point3avx! (x)
1176
+ @avx for i ∈ eachindex (x)
1177
+ x[i] = 2.3
1178
+ end
1179
+ end
1180
+ function make2point3_avx! (x)
1181
+ @_avx for i ∈ eachindex (x)
1182
+ x[i] = 2.3
1183
+ end
1184
+ end
1185
+ function make23avx! (x)
1186
+ @avx for i ∈ eachindex (x)
1187
+ x[i] = 23
1188
+ end
1189
+ end
1190
+ function make23_avx! (x)
1191
+ @_avx for i ∈ eachindex (x)
1192
+ x[i] = 23
1193
+ end
1194
+ end
1195
+ function myfillavx! (x, a)
1196
+ @avx for i ∈ eachindex (x)
1197
+ x[i] = a
1198
+ end
1199
+ end
1200
+ function myfill_avx! (x, a)
1201
+ @_avx for i ∈ eachindex (x)
1202
+ x[i] = a
1203
+ end
1204
+ end
1205
+
1206
+ for T ∈ (Float32, Float64, Int32, Int64)
1207
+ @show T, @__LINE__
1208
+ R = T <: Integer ? (- T (100 ): T (100 )) : T
1209
+ x = rand (R, 237 );
1210
+ q1 = similar (x); q2 = similar (x);
1211
+
1212
+ fill! (q2, - 999999 ); copyavx1! (q2, x);
1213
+ @test x == q2
1214
+ fill! (q2, - 999999 ); copy_avx1! (q2, x);
1215
+ @test x == q2
1216
+ fill! (q2, - 999999 ); copyavx2! (q2, x);
1217
+ @test x == q2
1218
+ fill! (q2, - 999999 ); copy_avx2! (q2, x);
1219
+ @test x == q2
1220
+ fill! (q2, - 999999 ); @avx q2 .= x;
1221
+ @test x == q2
1222
+
1223
+ B = rand (R, 79 , 83 );
1224
+ A1 = zeros (T, 79 , 85 );
1225
+ A2 = zeros (T, 79 , 85 );
1226
+ offset_copy! (A1, B);
1227
+ fill! (A2, 0 ); offset_copyavx1! (A2, B);
1228
+ @test A1 == A2
1229
+ fill! (A2, 0 ); offset_copyavx2! (A2, B);
1230
+ @test A1 == A2
1231
+ fill! (A2, 0 ); offset_copy_avx1! (A2, B);
1232
+ @test A1 == A2
1233
+ fill! (A2, 0 ); offset_copy_avx2! (A2, B);
1234
+ @test A1 == A2
1235
+
1236
+ a = rand (R)
1237
+ myfillavx! (x, a);
1238
+ fill! (q2, a);
1239
+ @test x == q2
1240
+ a = rand (R)
1241
+ myfill_avx! (x, a);
1242
+ fill! (q2, a);
1243
+ @test x == q2
1244
+ a = rand (R)
1245
+ myfill_avx! (x, a);
1246
+ fill! (q2, a);
1247
+ @test x == q2
1248
+ a = rand (R)
1249
+ myfillavx! (x, a);
1250
+ fill! (q2, a);
1251
+ @test x == q2
1252
+ if T <: Union{Float32,Float64}
1253
+ make2point3avx! (x)
1254
+ fill! (q2, 2.3 )
1255
+ @test x == q2
1256
+ fill! (x, - 999999 ); make2point3_avx! (x)
1257
+ @test x == q2
1258
+ end
1259
+ a = rand (R)
1260
+ @avx x .= a;
1261
+ fill! (q2, a);
1262
+ @test x == q2
1263
+ a = rand (R)
1264
+ @avx x .= a;
1265
+ fill! (q2, a);
1266
+ @test x == q2
1267
+
1268
+ end
1269
+ end
1270
+
1202
1271
@time @testset " broadcast" begin
1203
1272
M, N = 37 , 47
1204
1273
# M = 77;
0 commit comments