@@ -108,6 +108,8 @@ const COST = Dict{Instruction,InstructionCost}(
108
108
Instruction (:(/ )) => InstructionCost (13 ,4.0 ,- 2.0 ),
109
109
Instruction (:vadd ) => InstructionCost (4 ,0.5 ),
110
110
Instruction (:vsub ) => InstructionCost (4 ,0.5 ),
111
+ Instruction (:vadd! ) => InstructionCost (4 ,0.5 ),
112
+ Instruction (:vsub! ) => InstructionCost (4 ,0.5 ),
111
113
Instruction (:vmul ) => InstructionCost (4 ,0.5 ),
112
114
Instruction (:vfdiv ) => InstructionCost (13 ,4.0 ,- 2.0 ),
113
115
Instruction (:evadd ) => InstructionCost (4 ,0.5 ),
@@ -148,10 +150,21 @@ const COST = Dict{Instruction,InstructionCost}(
148
150
Instruction (:vfmsub ) => InstructionCost (4 ,0.5 ), # - and * will fuse into this, so much of the time they're not twice as expensive
149
151
Instruction (:vfnmadd ) => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
150
152
Instruction (:vfnmsub ) => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
153
+ Instruction (:vfmadd! ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
154
+ Instruction (:vfnmadd! ) => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
151
155
Instruction (:vfmadd_fast ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
152
156
Instruction (:vfmsub_fast ) => InstructionCost (4 ,0.5 ), # - and * will fuse into this, so much of the time they're not twice as expensive
153
157
Instruction (:vfnmadd_fast ) => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
154
158
Instruction (:vfnmsub_fast ) => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
159
+ Instruction (:vfmaddaddone ) => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
160
+ Instruction (:vmullog2 ) => InstructionCost (4 ,0.5 ),
161
+ Instruction (:vmullog2add! ) => InstructionCost (4 ,0.5 ),
162
+ Instruction (:vmullog10 ) => InstructionCost (4 ,0.5 ),
163
+ Instruction (:vmullog10add! ) => InstructionCost (4 ,0.5 ),
164
+ Instruction (:vdivlog2 ) => InstructionCost (13 ,4.0 ,- 2.0 ),
165
+ Instruction (:vdivlog2add! ) => InstructionCost (13 ,4.0 ,- 2.0 ),
166
+ Instruction (:vdivlog10 ) => InstructionCost (13 ,4.0 ,- 2.0 ),
167
+ Instruction (:vdivlog10add! ) => InstructionCost (13 ,4.0 ,- 2.0 ),
155
168
Instruction (:sqrt ) => InstructionCost (15 ,4.0 ,- 2.0 ),
156
169
Instruction (:sqrt_fast ) => InstructionCost (15 ,4.0 ,- 2.0 ),
157
170
Instruction (:log ) => InstructionCost (20 ,20.0 ,40.0 ,20 ),
@@ -213,6 +226,8 @@ const REDUCTION_CLASS = Dict{Symbol,Float64}(
213
226
:vfmsub => ADDITIVE_IN_REDUCTIONS,
214
227
:vfnmadd => ADDITIVE_IN_REDUCTIONS,
215
228
:vfnmsub => ADDITIVE_IN_REDUCTIONS,
229
+ :vfmadd! => ADDITIVE_IN_REDUCTIONS,
230
+ :vfnmadd! => ADDITIVE_IN_REDUCTIONS,
216
231
:vfmadd_fast => ADDITIVE_IN_REDUCTIONS,
217
232
:vfmsub_fast => ADDITIVE_IN_REDUCTIONS,
218
233
:vfnmadd_fast => ADDITIVE_IN_REDUCTIONS,
@@ -260,9 +275,11 @@ isreductcombineinstr(instr::Instruction) = isreductcombineinstr(instr.instr)
260
275
const FUNCTIONSYMBOLS = Dict {Type{<:Function},Instruction} (
261
276
typeof (+ ) => :(+ ),
262
277
typeof (SIMDPirates. vadd) => :(+ ),
278
+ typeof (SIMDPirates. vadd!) => :(+ ),
263
279
typeof (Base. FastMath. add_fast) => :(+ ),
264
280
typeof (- ) => :(- ),
265
281
typeof (SIMDPirates. vsub) => :(- ),
282
+ typeof (SIMDPirates. vsub!) => :(- ),
266
283
typeof (Base. FastMath. sub_fast) => :(- ),
267
284
typeof (* ) => :(* ),
268
285
typeof (SIMDPirates. vmul) => :(* ),
@@ -287,10 +304,21 @@ const FUNCTIONSYMBOLS = Dict{Type{<:Function},Instruction}(
287
304
typeof (SIMDPirates. vfmsub) => :vfmsub ,
288
305
typeof (SIMDPirates. vfnmadd) => :vfnmadd ,
289
306
typeof (SIMDPirates. vfnmsub) => :vfnmsub ,
307
+ typeof (SIMDPirates. vfmadd!) => :vfmadd! ,
308
+ typeof (SIMDPirates. vfnmadd!) => :vfnmadd! ,
290
309
typeof (SIMDPirates. vfmadd_fast) => :vfmadd_fast ,
291
310
typeof (SIMDPirates. vfmsub_fast) => :vfmsub_fast ,
292
311
typeof (SIMDPirates. vfnmadd_fast) => :vfnmadd_fast ,
293
312
typeof (SIMDPirates. vfnmsub_fast) => :vfnmsub_fast ,
313
+ typeof (vfmaddaddone) => :vfmaddaddone ,
314
+ typeof (vmullog2) => :vmullog2 ,
315
+ typeof (vmullog2add!) => :vmullog2add! ,
316
+ typeof (vmullog10) => :vmullog10 ,
317
+ typeof (vmullog10add!) => :vmullog10add! ,
318
+ typeof (vdivlog2) => :vdivlog2 ,
319
+ typeof (vdivlog2add!) => :vdivlog2add! ,
320
+ typeof (vdivlog10) => :vdivlog10 ,
321
+ typeof (vdivlog10add!) => :vdivlog10add! ,
294
322
typeof (sqrt) => :sqrt ,
295
323
typeof (Base. FastMath. sqrt_fast) => :sqrt ,
296
324
typeof (SIMDPirates. vsqrt) => :sqrt ,
0 commit comments