@@ -84,7 +84,15 @@ const COST = Dict{Instruction,InstructionCost}(
84
84
Instruction (:vadd ) => InstructionCost (4 ,0.5 ),
85
85
Instruction (:vsub ) => InstructionCost (4 ,0.5 ),
86
86
Instruction (:vmul ) => InstructionCost (4 ,0.5 ),
87
- Instruction (:vdiv ) => InstructionCost (13 ,4.0 ,- 2.0 ),
87
+ Instruction (:vfdiv ) => InstructionCost (13 ,4.0 ,- 2.0 ),
88
+ Instruction (:evadd ) => InstructionCost (4 ,0.5 ),
89
+ Instruction (:evsub ) => InstructionCost (4 ,0.5 ),
90
+ Instruction (:evmul ) => InstructionCost (4 ,0.5 ),
91
+ Instruction (:evfdiv ) => InstructionCost (13 ,4.0 ,- 2.0 ),
92
+ Instruction (:reduced_add ) => InstructionCost (4 ,0.5 ),# ignoring reduction part of cost, might be nop
93
+ Instruction (:reduced_prod ) => InstructionCost (4 ,0.5 ),# ignoring reduction part of cost, might be nop
94
+ Instruction (:reduce_to_add ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
95
+ Instruction (:reduce_to_prod ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
88
96
Instruction (:abs2 ) => InstructionCost (4 ,0.5 ),
89
97
Instruction (:vabs2 ) => InstructionCost (4 ,0.5 ),
90
98
Instruction (:(== )) => InstructionCost (1 , 0.5 ),
@@ -110,14 +118,20 @@ const COST = Dict{Instruction,InstructionCost}(
110
118
Instruction (:vfnmadd_fast ) => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
111
119
Instruction (:vfnmsub_fast ) => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
112
120
Instruction (:sqrt ) => InstructionCost (15 ,4.0 ,- 2.0 ),
121
+ Instruction (:sqrt_fast ) => InstructionCost (15 ,4.0 ,- 2.0 ),
113
122
Instruction (:log ) => InstructionCost (20 ,20.0 ,40.0 ,20 ),
114
123
Instruction (:exp ) => InstructionCost (20 ,20.0 ,20.0 ,18 ),
115
124
Instruction (:sin ) => InstructionCost (18 ,15.0 ,68.0 ,23 ),
116
125
Instruction (:cos ) => InstructionCost (18 ,15.0 ,68.0 ,26 ),
117
126
Instruction (:sincos ) => InstructionCost (25 ,22.0 ,70.0 ,26 ),
127
+ Instruction (:log_fast ) => InstructionCost (20 ,20.0 ,40.0 ,20 ),
128
+ Instruction (:exp_fast ) => InstructionCost (20 ,20.0 ,20.0 ,18 ),
129
+ Instruction (:sin_fast ) => InstructionCost (18 ,15.0 ,68.0 ,23 ),
130
+ Instruction (:cos_fast ) => InstructionCost (18 ,15.0 ,68.0 ,26 ),
131
+ Instruction (:sincos_fast ) => InstructionCost (25 ,22.0 ,70.0 ,26 ),
118
132
Instruction (:identity ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
119
133
Instruction (:adjoint ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
120
- Instruction (:transpose ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
134
+ Instruction (:transpose ) => InstructionCost (0 ,0.0 ,0.0 ,0 )
121
135
# Symbol("##CONSTANT##") => InstructionCost(0,0.0)
122
136
)
123
137
# for (k, v) ∈ COST # so we can look up Symbol(typeof(function))
@@ -131,6 +145,9 @@ const CORRESPONDING_REDUCTION = Dict{Instruction,Instruction}(
131
145
Instruction (:vadd ) => Instruction (:vsum ),
132
146
Instruction (:vsub ) => Instruction (:vsum ),
133
147
Instruction (:vmul ) => Instruction (:vprod ),
148
+ Instruction (:evadd ) => Instruction (:vsum ),
149
+ Instruction (:evsub ) => Instruction (:vsum ),
150
+ Instruction (:evmul ) => Instruction (:vprod ),
134
151
Instruction (:& ) => Instruction (:vall ),
135
152
Instruction (:| ) => Instruction (:vany ),
136
153
Instruction (:muladd ) => Instruction (:vsum ),
@@ -140,7 +157,11 @@ const CORRESPONDING_REDUCTION = Dict{Instruction,Instruction}(
140
157
Instruction (:vfmadd ) => Instruction (:vsum ),
141
158
Instruction (:vfmsub ) => Instruction (:vsum ),
142
159
Instruction (:vfnmadd ) => Instruction (:vsum ),
143
- Instruction (:vfnmsub ) => Instruction (:vsum )
160
+ Instruction (:vfnmsub ) => Instruction (:vsum ),
161
+ Instruction (:vfmadd_fast ) => Instruction (:vsum ),
162
+ Instruction (:vfmsub_fast ) => Instruction (:vsum ),
163
+ Instruction (:vfnmadd_fast ) => Instruction (:vsum ),
164
+ Instruction (:vfnmsub_fast ) => Instruction (:vsum )
144
165
)
145
166
const REDUCTION_TRANSLATION = Dict {Instruction,Instruction} (
146
167
Instruction (:+ ) => Instruction (:evadd ),
@@ -158,25 +179,37 @@ const REDUCTION_TRANSLATION = Dict{Instruction,Instruction}(
158
179
Instruction (:vfmadd ) => Instruction (:evadd ),
159
180
Instruction (:vfmsub ) => Instruction (:evadd ),
160
181
Instruction (:vfnmadd ) => Instruction (:evadd ),
161
- Instruction (:vfnmsub ) => Instruction (:evadd )
182
+ Instruction (:vfnmsub ) => Instruction (:evadd ),
183
+ Instruction (:vfmadd_fast ) => Instruction (:evadd ),
184
+ Instruction (:vfmsub_fast ) => Instruction (:evadd ),
185
+ Instruction (:vfnmadd_fast ) => Instruction (:evadd ),
186
+ Instruction (:vfnmsub_fast ) => Instruction (:evadd )
162
187
)
163
188
const REDUCTION_ZERO = Dict {Instruction,Symbol} (
164
189
Instruction (:+ ) => :zero ,
165
190
Instruction (:vadd ) => :zero ,
191
+ Instruction (:evadd ) => :zero ,
166
192
Instruction (:* ) => :one ,
167
193
Instruction (:vmul ) => :one ,
194
+ Instruction (:evmul ) => :one ,
168
195
Instruction (:- ) => :zero ,
169
196
Instruction (:vsub ) => :zero ,
197
+ Instruction (:evsub ) => :zero ,
170
198
Instruction (:/ ) => :one ,
171
199
Instruction (:vfdiv ) => :one ,
200
+ Instruction (:evfdiv ) => :one ,
172
201
Instruction (:muladd ) => :zero ,
173
202
Instruction (:fma ) => :zero ,
174
203
Instruction (:vmuladd ) => :zero ,
175
204
Instruction (:vfma ) => :zero ,
176
205
Instruction (:vfmadd ) => :zero ,
177
206
Instruction (:vfmsub ) => :zero ,
178
207
Instruction (:vfnmadd ) => :zero ,
179
- Instruction (:vfnmsub ) => :zero
208
+ Instruction (:vfnmsub ) => :zero ,
209
+ Instruction (:vfmadd_fast ) => :zero ,
210
+ Instruction (:vfmsub_fast ) => :zero ,
211
+ Instruction (:vfnmadd_fast ) => :zero ,
212
+ Instruction (:vfnmsub_fast ) => :zero
180
213
)
181
214
182
215
lv (x) = GlobalRef (LoopVectorization, x)
@@ -197,7 +230,15 @@ const REDUCTION_SCALAR_COMBINE = Dict{Instruction,GlobalRef}(
197
230
Instruction (:vfmadd ) => lv (:reduced_add ),
198
231
Instruction (:vfmsub ) => lv (:reduced_add ),
199
232
Instruction (:vfnmadd ) => lv (:reduced_add ),
200
- Instruction (:vfnmsub ) => lv (:reduced_add )
233
+ Instruction (:vfnmsub ) => lv (:reduced_add ),
234
+ Instruction (:vfmadd_fast ) => lv (:reduced_add ),
235
+ Instruction (:vfmsub_fast ) => lv (:reduced_add ),
236
+ Instruction (:vfnmadd_fast ) => lv (:reduced_add ),
237
+ Instruction (:vfnmsub_fast ) => lv (:reduced_add )
238
+ )
239
+ const REDUCTION_COMBINETO = Dict {Symbol,Symbol} (
240
+ :reduced_add => :reduce_to_add ,
241
+ :reduced_prod => :reduce_to_prod
201
242
)
202
243
203
244
const FUNCTIONSYMBOLS = Dict {Type{<:Function},Instruction} (
@@ -230,6 +271,10 @@ const FUNCTIONSYMBOLS = Dict{Type{<:Function},Instruction}(
230
271
typeof (SIMDPirates. vfmsub) => :vfmsub ,
231
272
typeof (SIMDPirates. vfnmadd) => :vfnmadd ,
232
273
typeof (SIMDPirates. vfnmsub) => :vfnmsub ,
274
+ typeof (SIMDPirates. vfmadd_fast) => :vfmadd_fast ,
275
+ typeof (SIMDPirates. vfmsub_fast) => :vfmsub_fast ,
276
+ typeof (SIMDPirates. vfnmadd_fast) => :vfnmadd_fast ,
277
+ typeof (SIMDPirates. vfnmsub_fast) => :vfnmsub_fast ,
233
278
typeof (sqrt) => :sqrt ,
234
279
typeof (Base. FastMath. sqrt_fast) => :sqrt ,
235
280
typeof (SIMDPirates. vsqrt) => :sqrt ,
0 commit comments