@@ -137,7 +137,9 @@ const COST = Dict{Symbol,InstructionCost}(
137
137
:mul_fast => InstructionCost (4 ,0.5 ),
138
138
# :vfdiv => InstructionCost(13,4.0,-2.0),
139
139
# :vfdiv! => InstructionCost(13,4.0,-2.0),
140
+ :rem_fast => InstructionCost (13 ,4.0 ,- 2.0 ), # FIXME
140
141
:div_fast => InstructionCost (13 ,4.0 ,- 2.0 ),
142
+ :vdiv_fast => InstructionCost (20 ,4.0 ,- 2.0 ), # FIXME
141
143
# :evadd => InstructionCost(4,0.5),
142
144
# :evsub => InstructionCost(4,0.5),
143
145
# :evmul => InstructionCost(4,0.5),
@@ -152,7 +154,7 @@ const COST = Dict{Symbol,InstructionCost}(
152
154
:reduce_to_prod => InstructionCost (0 ,0.0 ,0.0 ,0 ),
153
155
:abs => InstructionCost (1 , 0.5 ),
154
156
:abs2 => InstructionCost (4 ,0.5 ),
155
- # :vabs2 => InstructionCost(4,0.5),
157
+ :abs2_fast => InstructionCost (4 ,0.5 ),
156
158
:(== ) => InstructionCost (1 , 0.5 ),
157
159
:(!= ) => InstructionCost (1 , 0.5 ),
158
160
:(isnan) => InstructionCost (1 , 0.5 ),
@@ -179,19 +181,26 @@ const COST = Dict{Symbol,InstructionCost}(
179
181
:iseven => InstructionCost (1 , 0.5 ),
180
182
:max => InstructionCost (4 ,0.5 ),
181
183
:min => InstructionCost (4 ,0.5 ),
184
+ :max_fast => InstructionCost (4 ,0.5 ),
185
+ :min_fast => InstructionCost (4 ,0.5 ),
182
186
:relu => InstructionCost (4 ,0.5 ),
183
187
# Instruction(:ifelse) => InstructionCost(1, 0.5),
184
188
:ifelse => InstructionCost (1 , 0.5 ),
185
189
:inv => InstructionCost (13 ,4.0 ,- 2.0 ,1 ),
190
+ :inv_fast => InstructionCost (10 ,4.0 ,- 2.0 ,1 ), # FIXME
186
191
# :vinv => InstructionCost(13,4.0,-2.0,1),
187
192
:muladd => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
188
193
:fma => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
189
- # :vmuladd => InstructionCost(4,0.5), # + and * will fuse into this, so much of the time they're not twice as expensive
190
- # :vfma => InstructionCost(4,0.5), # + and * will fuse into this, so much of the time they're not twice as expensive
194
+ :vmuladd_fast => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
195
+ :vfma_fast => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
191
196
:vfmadd => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
192
197
:vfmsub => InstructionCost (4 ,0.5 ), # - and * will fuse into this, so much of the time they're not twice as expensive
193
198
:vfnmadd => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
194
199
:vfnmsub => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
200
+ :vfmadd_fast => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
201
+ :vfmsub_fast => InstructionCost (4 ,0.5 ), # - and * will fuse into this, so much of the time they're not twice as expensive
202
+ :vfnmadd_fast => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
203
+ :vfnmsub_fast => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
195
204
:vfmadd231 => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
196
205
:vfmsub231 => InstructionCost (4 ,0.5 ), # - and * will fuse into this, so much of the time they're not twice as expensive
197
206
:vfnmadd231 => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
@@ -289,9 +298,15 @@ const REDUCTION_CLASS = Dict{Symbol,Float64}(
289
298
:* => MULTIPLICATIVE_IN_REDUCTIONS,
290
299
:vadd => ADDITIVE_IN_REDUCTIONS,
291
300
:vsub => ADDITIVE_IN_REDUCTIONS,
301
+ :add_fast => ADDITIVE_IN_REDUCTIONS,
302
+ :sub_fast => ADDITIVE_IN_REDUCTIONS,
303
+ :vadd_fast => ADDITIVE_IN_REDUCTIONS,
304
+ :vsub_fast => ADDITIVE_IN_REDUCTIONS,
292
305
# :vadd! => ADDITIVE_IN_REDUCTIONS,
293
306
# :vsub! => ADDITIVE_IN_REDUCTIONS,
294
307
:vmul => MULTIPLICATIVE_IN_REDUCTIONS,
308
+ :mul_fast => MULTIPLICATIVE_IN_REDUCTIONS,
309
+ :vmul_fast => MULTIPLICATIVE_IN_REDUCTIONS,
295
310
# :vmul! => MULTIPLICATIVE_IN_REDUCTIONS,
296
311
# :evadd => ADDITIVE_IN_REDUCTIONS,
297
312
# :evsub => ADDITIVE_IN_REDUCTIONS,
@@ -300,12 +315,16 @@ const REDUCTION_CLASS = Dict{Symbol,Float64}(
300
315
:| => ANY,
301
316
:muladd => ADDITIVE_IN_REDUCTIONS,
302
317
:fma => ADDITIVE_IN_REDUCTIONS,
303
- # :vmuladd => ADDITIVE_IN_REDUCTIONS,
304
- # :vfma => ADDITIVE_IN_REDUCTIONS,
318
+ :vmuladd_fast => ADDITIVE_IN_REDUCTIONS,
319
+ :vfma_fast => ADDITIVE_IN_REDUCTIONS,
305
320
:vfmadd => ADDITIVE_IN_REDUCTIONS,
306
321
:vfmsub => ADDITIVE_IN_REDUCTIONS,
307
322
:vfnmadd => ADDITIVE_IN_REDUCTIONS,
308
323
:vfnmsub => ADDITIVE_IN_REDUCTIONS,
324
+ :vfmadd_fast => ADDITIVE_IN_REDUCTIONS,
325
+ :vfmsub_fast => ADDITIVE_IN_REDUCTIONS,
326
+ :vfnmadd_fast => ADDITIVE_IN_REDUCTIONS,
327
+ :vfnmsub_fast => ADDITIVE_IN_REDUCTIONS,
309
328
:vfmadd231 => ADDITIVE_IN_REDUCTIONS,
310
329
:vfmsub231 => ADDITIVE_IN_REDUCTIONS,
311
330
:vfnmadd231 => ADDITIVE_IN_REDUCTIONS,
@@ -314,22 +333,20 @@ const REDUCTION_CLASS = Dict{Symbol,Float64}(
314
333
# :vfnmadd! => ADDITIVE_IN_REDUCTIONS,
315
334
# :vfmsub! => ADDITIVE_IN_REDUCTIONS,
316
335
# :vfnmsub! => ADDITIVE_IN_REDUCTIONS,
317
- :vfmadd_fast => ADDITIVE_IN_REDUCTIONS,
318
- :vfmsub_fast => ADDITIVE_IN_REDUCTIONS,
319
- :vfnmadd_fast => ADDITIVE_IN_REDUCTIONS,
320
- :vfnmsub_fast => ADDITIVE_IN_REDUCTIONS,
321
336
:reduced_add => ADDITIVE_IN_REDUCTIONS,
322
337
:reduced_prod => MULTIPLICATIVE_IN_REDUCTIONS,
323
338
:reduced_all => ALL,
324
339
:reduced_any => ANY,
325
340
:max => MAX,
326
- :min => MIN
341
+ :min => MIN,
342
+ :max_fast => MAX,
343
+ :min_fast => MIN
327
344
)
328
345
reduction_instruction_class (instr:: Symbol ) = get (REDUCTION_CLASS, instr, NaN )
329
346
reduction_instruction_class (instr:: Instruction ) = reduction_instruction_class (instr. instr)
330
347
function reduction_to_single_vector (x:: Float64 )
331
348
# x == 1.0 ? :evadd : x == 2.0 ? :evmul : x == 3.0 ? :vor : x == 4.0 ? :vand : x == 5.0 ? :max : x == 6.0 ? :min : throw("Reduction not found.")
332
- x == ADDITIVE_IN_REDUCTIONS ? :vadd : x == MULTIPLICATIVE_IN_REDUCTIONS ? :vmul : x == MAX ? :max : x == MIN ? :min : throw (" Reduction not found." )
349
+ x == ADDITIVE_IN_REDUCTIONS ? :( + ) : x == MULTIPLICATIVE_IN_REDUCTIONS ? :( * ) : x == MAX ? :max : x == MIN ? :min : throw (" Reduction not found." )
333
350
end
334
351
reduction_to_single_vector (x) = reduction_to_single_vector (reduction_instruction_class (x))
335
352
# function reduction_to_scalar(x::Float64)
@@ -366,21 +383,22 @@ const FUNCTIONSYMBOLS = IdDict{Type{<:Function},Instruction}(
366
383
typeof (+ ) => :(+ ),
367
384
typeof (VectorizationBase. vadd) => :(+ ),
368
385
# typeof(VectorizationBase.vadd!) => :(+),
369
- typeof (Base. FastMath. add_fast) => :( + ) ,
386
+ typeof (Base. FastMath. add_fast) => :add_fast ,
370
387
typeof (- ) => :(- ),
371
388
typeof (VectorizationBase. vsub) => :(- ),
372
389
# typeof(VectorizationBase.vsub!) => :(-),
373
- typeof (Base. FastMath. sub_fast) => :( - ) ,
390
+ typeof (Base. FastMath. sub_fast) => :sub_fast ,
374
391
typeof (* ) => :(* ),
375
392
typeof (VectorizationBase. vmul) => :(* ),
376
393
# typeof(VectorizationBase.vmul!) => :(*),
377
- typeof (Base. FastMath. mul_fast) => :( * ) ,
394
+ typeof (Base. FastMath. mul_fast) => :mul_fast ,
378
395
typeof (/ ) => :(/ ),
379
396
typeof (^ ) => :(^ ),
380
397
# typeof(VectorizationBase.vfdiv) => :(/),
381
398
# typeof(VectorizationBase.vfdiv!) => :(/),
382
399
typeof (VectorizationBase. vdiv) => :(/ ),
383
- typeof (Base. FastMath. div_fast) => :(/ ),
400
+ typeof (Base. FastMath. div_fast) => :div_fast ,
401
+ typeof (Base. FastMath. rem_fast) => :rem_fast ,
384
402
typeof (== ) => :(== ),
385
403
typeof (!= ) => :(!= ),
386
404
typeof (isequal) => :isequal ,
@@ -389,6 +407,7 @@ const FUNCTIONSYMBOLS = IdDict{Type{<:Function},Instruction}(
389
407
typeof (isfinite) => :isfinite ,
390
408
typeof (abs) => :abs ,
391
409
typeof (abs2) => :abs2 ,
410
+ typeof (abs2_fast) => :abs2_fast ,
392
411
typeof (~ ) => :(~ ),
393
412
typeof (! ) => :(! ),
394
413
typeof (& ) => :(& ),
@@ -399,14 +418,19 @@ const FUNCTIONSYMBOLS = IdDict{Type{<:Function},Instruction}(
399
418
typeof (>= ) => :(>= ),
400
419
typeof (<= ) => :(<= ),
401
420
typeof (inv) => :inv ,
421
+ typeof (inv_fast) => :inv_fast ,
402
422
typeof (muladd) => :muladd ,
403
423
typeof (fma) => :fma ,
404
- # typeof(VectorizationBase.vmuladd) => :vmuladd,
405
- # typeof(VectorizationBase.vfma) => :vfma,
406
- typeof (VectorizationBase. vfmadd) => :vfmadd ,
424
+ typeof (VectorizationBase. vfma) => :vfma ,
425
+ typeof (VectorizationBase. vmuladd) => :vmuladd ,
407
426
typeof (VectorizationBase. vfmsub) => :vfmsub ,
408
427
typeof (VectorizationBase. vfnmadd) => :vfnmadd ,
409
428
typeof (VectorizationBase. vfnmsub) => :vfnmsub ,
429
+ typeof (VectorizationBase. vfma_fast) => :vfma_fast ,
430
+ typeof (VectorizationBase. vmuladd_fast) => :vmuladd_fast ,
431
+ typeof (VectorizationBase. vfmsub_fast) => :vfmsub_fast ,
432
+ typeof (VectorizationBase. vfnmadd_fast) => :vfnmadd_fast ,
433
+ typeof (VectorizationBase. vfnmsub_fast) => :vfnmsub_fast ,
410
434
typeof (VectorizationBase. vfmadd231) => :vfmadd231 ,
411
435
typeof (VectorizationBase. vfmsub231) => :vfmsub231 ,
412
436
typeof (VectorizationBase. vfnmadd231) => :vfnmadd231 ,
@@ -454,6 +478,8 @@ const FUNCTIONSYMBOLS = IdDict{Type{<:Function},Instruction}(
454
478
# typeof(SLEEFPirates.tanh_fast) => :tanh_fast,
455
479
typeof (max) => :max ,
456
480
typeof (min) => :min ,
481
+ typeof (max_fast) => :max_fast ,
482
+ typeof (min_fast) => :min_fast ,
457
483
typeof (relu) => :relu ,
458
484
typeof (<< ) => :<< ,
459
485
typeof (>> ) => :>> ,
0 commit comments