Skip to content

Commit a0df499

Browse files
committed
Support transposed offset arrays.
1 parent ea2d6f0 commit a0df499

File tree

3 files changed

+49
-24
lines changed

3 files changed

+49
-24
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.7.2"
4+
version = "0.7.3"
55

66
[deps]
77
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"

src/vectorizationbase_extensions.jl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,21 @@ end
1515
@inline function VectorizationBase.stridedpointer(A::OffsetArrays.OffsetArray)
1616
OffsetStridedPointer(stridedpointer(parent(A)), VectorizationBase.staticm1(A.offsets))
1717
end
18+
19+
@inline function VectorizationBase.stridedpointer(
20+
B::Adjoint{T,A}
21+
) where {T,A<:OffsetArrays.OffsetArray{T}}
22+
Boff = parent(B)
23+
OffsetStridedPointer(
24+
stridedpointer(parent(Boff)'),
25+
VectorizationBase.staticm1(Boff.offsets)
26+
)
27+
end
28+
@inline function Base.transpose(A::OffsetStridedPointer)
29+
OffsetStridedPointer(
30+
transpose(A.ptr), A.offsets
31+
)
32+
end
1833
# Tuple of length == 1, use ind directly.
1934
# @inline VectorizationBase.offset(ptr::OffsetStridedPointer, ind::Tuple{I}) where {I} = VectorizationBase.offset(ptr.ptr, ind)
2035
# Tuple of length > 1, subtract offsets.

test/offsetarrays.jl

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,15 @@ T = Float64
4848
# end);
4949
# lsq2d = LoopVectorization.LoopSet(q2d); LoopVectorization.choose_order(lsq2d)
5050

51-
oq2 = :(for j in rng2, i in rng1
52-
tmp = zero(eltype(out))
53-
for jk in -1:1, ik in -1:1
54-
tmp += A[i+ik,j+jk]*kern[ik,jk]
55-
end
56-
out[i,j] = tmp
57-
end);
58-
lsoq = LoopVectorization.LoopSet(oq2);
59-
LoopVectorization.choose_order(lsoq)
51+
# oq2 = :(for j in rng2, i in rng1
52+
# tmp = zero(eltype(out))
53+
# for jk in -1:1, ik in -1:1
54+
# tmp += A[i+ik,j+jk]*kern[ik,jk]
55+
# end
56+
# out[i,j] = tmp
57+
# end);
58+
# lsoq = LoopVectorization.LoopSet(oq2);
59+
# LoopVectorization.choose_order(lsoq)
6060

6161
function avx2d!(out::AbstractMatrix, A::AbstractMatrix, kern)
6262
rng1k, rng2k = axes(kern)
@@ -141,19 +141,20 @@ T = Float64
141141
# lsuq = LoopVectorization.LoopSet(macroexpand(Base, uq));
142142
# LoopVectorization.choose_order(lsuq)
143143

144-
# out = out1;
145-
# z = zero(eltype(out));
146-
# R=CartesianIndices(out);
147-
# Rk = CartesianIndices(kern);
148-
# lsgeneric = LoopVectorization.@avx_debug for I in R
149-
# tmp = z
150-
# for J in Rk
151-
# tmp += A[I+J]*kern[J]
152-
# end
153-
# out[I] = tmp
154-
# end;
155-
# LoopVectorization.choose_order(lsgeneric)
156-
# out = out1;
144+
# using LoopVectorization, OffsetArrays
145+
# T = Float64
146+
# A = rand(T, 100, 100);
147+
# kern = OffsetArray(rand(T, 3, 3), -1:1, -1:1);
148+
# out = OffsetArray(similar(A, size(A).-2), 1, 1); # stay away from the edges of A
149+
# lsgeneric = LoopVectorization.@avx_debug for I in CartesianIndices(out)
150+
# tmp = zero(eltype(out))
151+
# for J in CartesianIndices(kern)
152+
# tmp += A[I+J]*kern[J]
153+
# end
154+
# out[I] = tmp
155+
# end;
156+
# LoopVectorization.choose_order(lsgeneric)
157+
# # out = out1;
157158
# lsgenerics = LoopVectorization.@avx_debug for I in CartesianIndices(out)
158159
# tmp = zero(eltype(out))
159160
# for J in CartesianIndices(skern)
@@ -194,7 +195,7 @@ T = Float64
194195

195196
for T (Float32, Float64)
196197
@show T, @__LINE__
197-
A = rand(T, 100, 100);
198+
A = rand(T, 100, 100); At = copy(A');
198199
kern = OffsetArray(rand(T, 3, 3), -1:1, -1:1);
199200
skern = SizedOffsetMatrix{T,-1,1,-1,1}(parent(kern));
200201
out1 = OffsetArray(similar(A, size(A).-2), 1, 1); # stay away from the edges of A
@@ -210,6 +211,15 @@ T = Float64
210211
fill!(out2, NaN); avx2d!(out2, A, skern);
211212
@test out1 out2
212213

214+
fill!(out2, NaN); avx2d!(out2, At', skern);
215+
@test out1 out2
216+
217+
fill!(out2, NaN); avx2d!(out2', A, skern);
218+
@test out1 out2'
219+
220+
fill!(out2, NaN); avx2d!(out2', At', skern);
221+
@test out1 out2'
222+
213223
fill!(out3, NaN); avx2douter!(out3, A, skern);
214224
@test out1 out3
215225

0 commit comments

Comments
 (0)