1
- function Loop (:: Type{UnitRange{Int}} )
2
- Loop (gensym (:n ), 0 , 1024 , gensym (:loopstart ), gensym (:loopstop ), false , false ):: Loop
1
+ function Loop (ls:: LoopSet , l:: Int , :: Type{UnitRange{Int}} )
2
+ start = gensym (:loopstart ); stop = gensym (:loopstop )
3
+ pushpreamble! (ls, Expr (:(= ), start, Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , @__FILE__ ), Expr (:(.), Expr (:ref , :lb , l), QuoteNode (:start )))))
4
+ pushpreamble! (ls, Expr (:(= ), stop, Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , @__FILE__ ), Expr (:(.), Expr (:ref , :lb , l), QuoteNode (:stop )))))
5
+ Loop (gensym (:n ), 0 , 1024 , start, stop, false , false ):: Loop
3
6
end
4
- function Loop (:: Type{StaticUpperUnitRange{U}} ) where {U}
5
- Loop (gensym (:n ), 0 , U, gensym (:loopstart ), Symbol (" " ), false , true ):: Loop
7
+ function Loop (ls:: LoopSet , l:: Int , :: Type{StaticUpperUnitRange{U}} ) where {U}
8
+ start = gensym (:loopstart )
9
+ pushpreamble! (ls, Expr (:(= ), start, Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , @__FILE__ ), Expr (:(.), Expr (:ref , :lb , l), QuoteNode (:L )))))
10
+ Loop (gensym (:n ), U - 1024 , U, start, Symbol (" " ), false , true ):: Loop
6
11
end
7
- function Loop (:: Type{StaticLowerUnitRange{L}} ) where {L}
8
- Loop (gensym (:n ), L, L + 1024 , Symbol (" " ), gensym (:loopstop ), true , false ):: Loop
12
+ function Loop (ls:: LoopSet , l:: Int , :: Type{StaticLowerUnitRange{L}} ) where {L}
13
+ stop = gensym (:loopstop )
14
+ pushpreamble! (ls, Expr (:(= ), stop, Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , @__FILE__ ), Expr (:(.), Expr (:ref , :lb , l), QuoteNode (:U )))))
15
+ Loop (gensym (:n ), L, L + 1024 , Symbol (" " ), stop, true , false ):: Loop
9
16
end
10
- function Loop (:: Type{StaticUnitRange{L,U}} ) where {L,U}
17
+ function Loop (ls, l, :: Type{StaticUnitRange{L,U}} ) where {L,U}
11
18
Loop (gensym (:n ), L, U, Symbol (" " ), Symbol (" " ), true , true ):: Loop
12
19
end
13
20
14
21
function add_loops! (ls:: LoopSet , LB)
15
22
loopsyms = [gensym (:n ) for _ ∈ eachindex (LB)]
16
23
for l ∈ LB
17
- add_loop! (ls, Loop (LB):: Loop )
24
+ add_loop! (ls, Loop (ls, l, LB):: Loop )
18
25
end
19
-
20
- end
21
- function add_ops! (ls:: LoopSet , ops:: Vector{OperationStruct} , start:: Int = 0 , stopvptr = nothing )
22
- num_ops = length (ops)
23
- while start < num_ops
24
- start += 1
25
- opdescript = ops[start]
26
-
27
- stopvptr === vptr (op) && return start
28
- end
29
- 0
30
26
end
31
- numinds (u:: UInt ) = 8 - (leading_zeros (u) >>> 3 )
32
- function add_mref! (ls:: LoopSet , ar:: ArrayRef , arraysymbolinds:: Vector{Symbol} , opsymbols:: Vector{Symbol} , :: Type{PackedStridedPointer{T, N}} ) where {T, N}
27
+ function ArrayReferenceMeta (
28
+ ls:: LoopSet , ar:: ArrayRefStruct , arraysymbolinds:: Vector{Symbol} , opsymbols:: Vector{Symbol} ,
29
+ array:: Symbol , vp:: Symbol
30
+ )
33
31
index_types = ar. index_types
34
32
indices = ar. indices
35
- ni = numinds (index_types)
36
- Ni = N + 1
37
- @assert ni == Ni
38
- index_vec = Vector {Symbol} (undef, Ni)
33
+ ni = filled_8byte_chunks (index_types)
34
+ index_vec = Vector {Symbol} (undef, ni)
35
+ loopedindex = fill (false , ni)
39
36
while index_types != zero (UInt64)
40
37
ind = indices % UInt8
41
38
symind = if index_types == LoopIndex
39
+ loopedindex[ni] = true
42
40
ls. loopsymbols[ind]
43
41
elseif index_types == ComputedIndex
44
42
opsymbols[ind]
@@ -51,15 +49,61 @@ function add_mref!(ls::LoopSet, ar::ArrayRef, arraysymbolinds::Vector{Symbol}, o
51
49
indices >>>= 8
52
50
ni -= 1
53
51
end
54
-
52
+ ArrayReferenceMeta (
53
+ ArrayReference (vp, index_vec),
54
+ loopedindex, array
55
+ )
55
56
end
56
57
57
- function add_mrefs! (ls:: LoopSet , arf:: Vector{ArrayRefStruct} , as:: Vector{Symbol} , os:: Vector{Symbol} , vargs)
58
+ function add_mref! (ls:: LoopSet , ars:: ArrayRefStruct , arraysymbolinds:: Vector{Symbol} , opsymbols:: Vector{Symbol} , i:: Int , :: Type{PackedStridedPointer{T, N}} ) where {T, N}
59
+ ar = ArrayReferenceMeta (ls, ar, arraysymbolinds, opsymbols, Symbol (" " ), gensym ())
60
+ pushpreamble! (ls, Expr (:(= ), vptr (ar), Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , @__FILE__ ), Expr (:ref , :vargs , i))))
61
+ ar
62
+ end
63
+ function add_mref! (ls:: LoopSet , ars:: ArrayRefStruct , arraysymbolinds:: Vector{Symbol} , opsymbols:: Vector{Symbol} , i:: Int , :: Type{RowMajorStridedPointer{T, N}} ) where {T, N}
64
+ ar = ArrayReferenceMeta (ls, ar, arraysymbolinds, opsymbols, Symbol (" " ), gensym ())
65
+ reverse! (ar. loopedindex); reverse! (getindices (ar)) # reverse the listed indices here, and transpose it to make it column major
66
+ pushpreamble! (ls, Expr (:(= ), vptr (ar), Expr (:call , lv (:Transpose ), Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , @__FILE__ ), Expr (:ref , :vargs , i)))))
67
+ ar
68
+ end
69
+ function add_mref! (ls:: LoopSet , ars:: ArrayRefStruct , arraysymbolinds:: Vector{Symbol} , opsymbols:: Vector{Symbol} , i:: Int , :: Type{StaticStridedPointer{T, X}} ) where {T, X <: Tuple{1,Vararg} }
70
+ ar = ArrayReferenceMeta (ls, ar, arraysymbolinds, opsymbols, Symbol (" " ), gensym ())
71
+ pushpreamble! (ls, Expr (:(= ), vptr (ar), Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , @__FILE__ ), Expr (:ref , :vargs , i))))
72
+ ar
73
+ end
74
+ function add_mref! (ls:: LoopSet , ars:: ArrayRefStruct , arraysymbolinds:: Vector{Symbol} , opsymbols:: Vector{Symbol} , i:: Int , :: Type{StaticStridedPointer{T, X}} ) where {T, X <: Tuple }
75
+ ar = ArrayReferenceMeta (ls, ar, arraysymbolinds, opsymbols, Symbol (" " ), gensym ())
76
+ pushpreamble! (ls, Expr (:(= ), vptr (ar), Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , @__FILE__ ), Expr (:ref , :vargs , i))))
77
+ pushfirst! (getindices (ar), Symbol (" ##DISCONTIGUOUSSUBARRAY##" ))
78
+ ar
79
+ end
80
+ function add_mref! (ls:: LoopSet , ars:: ArrayRefStruct , arraysymbolinds:: Vector{Symbol} , opsymbols:: Vector{Symbol} , i:: Int , :: Type{SparseStridedPointer{T, N}} ) where {T, N}
81
+ ar = ArrayReferenceMeta (ls, ar, arraysymbolinds, opsymbols, Symbol (" " ), gensym ())
82
+ pushpreamble! (ls, Expr (:(= ), vptr (ar), Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , @__FILE__ ), Expr (:ref , :vargs , i))))
83
+ pushfirst! (getindices (ar), Symbol (" ##DISCONTIGUOUSSUBARRAY##" ))
84
+ ar
85
+ end
86
+ function add_mref! (ls:: LoopSet , ars:: ArrayRefStruct , arraysymbolinds:: Vector{Symbol} , opsymbols:: Vector{Symbol} , i:: Int , :: Type{StaticStridedStruct{T, X}} ) where {T, X <: Tuple{1,Vararg} }
87
+ ar = ArrayReferenceMeta (ls, ar, arraysymbolinds, opsymbols, Symbol (" " ), gensym ())
88
+ pushpreamble! (ls, Expr (:(= ), vptr (ar), Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , @__FILE__ ), Expr (:ref , :vargs , i))))
89
+ ar
90
+ end
91
+ function add_mref! (ls:: LoopSet , ars:: ArrayRefStruct , arraysymbolinds:: Vector{Symbol} , opsymbols:: Vector{Symbol} , i:: Int , :: Type{StaticStridedStruct{T, X}} ) where {T, X <: Tuple }
92
+ ar = ArrayReferenceMeta (ls, ar, arraysymbolinds, opsymbols, Symbol (" " ), gensym ())
93
+ pushpreamble! (ls, Expr (:(= ), vptr (ar), Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ , @__FILE__ ), Expr (:ref , :vargs , i))))
94
+ pushfirst! (getindices (ar), Symbol (" ##DISCONTIGUOUSSUBARRAY##" ))
95
+ ar
96
+ end
97
+
98
+
99
+
100
+ function create_mrefs! (ls:: LoopSet , arf:: Vector{ArrayRefStruct} , as:: Vector{Symbol} , os:: Vector{Symbol} , vargs)
101
+ mrefs = Vector {ArrayReferenceMeta} (undef, length (arf))
58
102
for i ∈ eachindex (arf)
59
- ref = arf[i]
60
- ptr_type = vargs[i]
61
-
103
+ ref = add_mref! (ls, arf[i], as, os, i, vargs[i]):: ArrayReferenceMeta
104
+ mrefs[i] = ref
62
105
end
106
+ mrefs
63
107
end
64
108
function process_metadata! (ls:: LoopSet , AM, num_arrays:: Int )
65
109
num_asi = (AM[1 ]):: Int
@@ -77,14 +121,65 @@ function process_metadata!(ls::LoopSet, AM, num_arrays::Int)
77
121
append! (ls. preamble_ones, AM[7 ]. parameters)
78
122
arraysymbolinds
79
123
end
124
+ function parents_symvec (ls:: LoopSet , u:: Unsigned )
125
+ i = filled_4byte_chunks (u)
126
+ loops = Vector {Symbol} (undef, i)
127
+ while u != zero (u)
128
+ loops[i] = getloopsym (ls, ( u % UInt8 ) & 0x0f )
129
+ i -= 1
130
+ u >>= 4
131
+ end
132
+ loops
133
+ end
134
+ loopdependencies (ls:: LoopSet , os:: OperationStruct ) = parents_symvec (ls, op. loopdeps)
135
+ reduceddependencies (ls:: LoopSet , os:: OperationStruct ) = parents_symvec (ls, op. reduceddeps)
136
+
137
+
138
+
139
+ function add_op! (ls:: LoopSet , os:: OperationStruct , mrefs:: Vector{ArrayReferenceMeta} , opsymbol:: Symbol , elementbytes:: Int )
140
+ optype = os. node_type
141
+ op = Operation (
142
+ length (operations (ls)), opsymbol, elementbytes, os. instruction,
143
+ optype, loopdependencies (ls, os), reduceddependencies (ls, os),
144
+ Operation[], (isload (op) | isstore (op)) ? mrefs[os. array] : NOTAREFERENCE
145
+ )
146
+ push! (ls. operations, op)
147
+ op
148
+ end
149
+ function add_parents_to_op! (ls:: LoopSet , parents:: Vector{Operation} , up:: Unsigned )
150
+ ops = operations (ls)
151
+ while up != zero (up)
152
+ pushfirst! (parents, ops[ up % UInt8 ])
153
+ up >>>= 8
154
+ end
155
+ end
156
+ function add_parents_to_ops! (ls:: LoopSet , ops:: Vector{OperationStruct} )
157
+ for i ∈ eachindex (ops)
158
+ add_parents_to_op! (ls, parents (getop (ls, i)), ops[i]. parents)
159
+ end
160
+ end
161
+ function add_ops! (ls:: LoopSet , ops:: Vector{OperationStruct} , mrefs:: Vector{ArrayReferenceMeta} , opsymbols:: Vector{Symbol} , elementbytes:: Int )
162
+ for i ∈ eachindex (ops)
163
+ add_op! (ls, ops[i], mrefs, opsymbols[i], elementbytes)
164
+ end
165
+ add_parents_to_ops! (ls, ops)
166
+ end
167
+
168
+ # elbytes(::VectorizationBase.AbstractPointer{T}) where {T} = sizeof(T)::Int
169
+ typeeltype (:: Type{P} ) where {T,P<: VectorizationBase.AbstractPointer{T} } = T
170
+
80
171
function avx_body (ops, arf, AM, LB, vargs)
81
172
ls = LoopSet ()
173
+ # elementbytes = mapreduce(elbytes, min, @view(vargs[Base.OneTo(length(arf))]))::Int
174
+ elementbytes = sizeof (mapreduce (typeeltype,promote_type,@view (vargs[Base. OneTo (length (arf))]))):: Int
82
175
add_loops! (ls, LB)
83
176
arraysymbolinds = process_metadata! (ls, AM, length (arf))
84
177
opsymbols = [gensym (:op ) for _ ∈ eachindex (ops)]
85
-
178
+ mrefs = create_mrefs (ls, arf, arraysymbolinds, opsymbols, vargs)
179
+ add_ops! (ls, ops, mrefs, opsymbols, elementbytes)
86
180
end
87
181
182
+
88
183
@generated function _avx! (:: Type{OPS} , :: Type{ARF} , :: Type{AM} , lb:: LB , vargs... )
89
184
avx_body (
90
185
OperationStruct[OPS. parameters... ],
0 commit comments