@@ -83,7 +83,11 @@ pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
83
83
#[ cfg_attr( test, assert_instr( vfmadd) ) ]
84
84
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
85
85
pub unsafe fn _mm_fmadd_sd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d {
86
- vfmaddsd ( a, b, c)
86
+ simd_insert ! (
87
+ a,
88
+ 0 ,
89
+ _mm_cvtsd_f64( a) . mul_add( _mm_cvtsd_f64( b) , _mm_cvtsd_f64( c) )
90
+ )
87
91
}
88
92
89
93
/// Multiplies the lower single-precision (32-bit) floating-point elements in
@@ -97,7 +101,11 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
97
101
#[ cfg_attr( test, assert_instr( vfmadd) ) ]
98
102
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
99
103
pub unsafe fn _mm_fmadd_ss ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 {
100
- vfmaddss ( a, b, c)
104
+ simd_insert ! (
105
+ a,
106
+ 0 ,
107
+ _mm_cvtss_f32( a) . mul_add( _mm_cvtss_f32( b) , _mm_cvtss_f32( c) )
108
+ )
101
109
}
102
110
103
111
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -161,7 +169,7 @@ pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
161
169
#[ cfg_attr( test, assert_instr( vfmsub) ) ]
162
170
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
163
171
pub unsafe fn _mm_fmsub_pd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d {
164
- vfmsubpd ( a, b, c )
172
+ simd_fma ( a, b, simd_neg ( c ) )
165
173
}
166
174
167
175
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -173,7 +181,7 @@ pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
173
181
#[ cfg_attr( test, assert_instr( vfmsub) ) ]
174
182
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
175
183
pub unsafe fn _mm256_fmsub_pd ( a : __m256d , b : __m256d , c : __m256d ) -> __m256d {
176
- vfmsubpd256 ( a, b, c )
184
+ simd_fma ( a, b, simd_neg ( c ) )
177
185
}
178
186
179
187
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -185,7 +193,7 @@ pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
185
193
#[ cfg_attr( test, assert_instr( vfmsub213ps) ) ]
186
194
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
187
195
pub unsafe fn _mm_fmsub_ps ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 {
188
- vfmsubps ( a, b, c )
196
+ simd_fma ( a, b, simd_neg ( c ) )
189
197
}
190
198
191
199
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -197,7 +205,7 @@ pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
197
205
#[ cfg_attr( test, assert_instr( vfmsub213ps) ) ]
198
206
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
199
207
pub unsafe fn _mm256_fmsub_ps ( a : __m256 , b : __m256 , c : __m256 ) -> __m256 {
200
- vfmsubps256 ( a, b, c )
208
+ simd_fma ( a, b, simd_neg ( c ) )
201
209
}
202
210
203
211
/// Multiplies the lower double-precision (64-bit) floating-point elements in
@@ -211,7 +219,11 @@ pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
211
219
#[ cfg_attr( test, assert_instr( vfmsub) ) ]
212
220
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
213
221
pub unsafe fn _mm_fmsub_sd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d {
214
- vfmsubsd ( a, b, c)
222
+ simd_insert ! (
223
+ a,
224
+ 0 ,
225
+ _mm_cvtsd_f64( a) . mul_add( _mm_cvtsd_f64( b) , -_mm_cvtsd_f64( c) )
226
+ )
215
227
}
216
228
217
229
/// Multiplies the lower single-precision (32-bit) floating-point elements in
@@ -225,7 +237,11 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
225
237
#[ cfg_attr( test, assert_instr( vfmsub) ) ]
226
238
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
227
239
pub unsafe fn _mm_fmsub_ss ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 {
228
- vfmsubss ( a, b, c)
240
+ simd_insert ! (
241
+ a,
242
+ 0 ,
243
+ _mm_cvtss_f32( a) . mul_add( _mm_cvtss_f32( b) , -_mm_cvtss_f32( c) )
244
+ )
229
245
}
230
246
231
247
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -289,7 +305,7 @@ pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
289
305
#[ cfg_attr( test, assert_instr( vfnmadd) ) ]
290
306
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
291
307
pub unsafe fn _mm_fnmadd_pd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d {
292
- vfnmaddpd ( a , b, c)
308
+ simd_fma ( simd_neg ( a ) , b, c)
293
309
}
294
310
295
311
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -301,7 +317,7 @@ pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
301
317
#[ cfg_attr( test, assert_instr( vfnmadd) ) ]
302
318
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
303
319
pub unsafe fn _mm256_fnmadd_pd ( a : __m256d , b : __m256d , c : __m256d ) -> __m256d {
304
- vfnmaddpd256 ( a , b, c)
320
+ simd_fma ( simd_neg ( a ) , b, c)
305
321
}
306
322
307
323
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -313,7 +329,7 @@ pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
313
329
#[ cfg_attr( test, assert_instr( vfnmadd) ) ]
314
330
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
315
331
pub unsafe fn _mm_fnmadd_ps ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 {
316
- vfnmaddps ( a , b, c)
332
+ simd_fma ( simd_neg ( a ) , b, c)
317
333
}
318
334
319
335
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -325,7 +341,7 @@ pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
325
341
#[ cfg_attr( test, assert_instr( vfnmadd) ) ]
326
342
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
327
343
pub unsafe fn _mm256_fnmadd_ps ( a : __m256 , b : __m256 , c : __m256 ) -> __m256 {
328
- vfnmaddps256 ( a , b, c)
344
+ simd_fma ( simd_neg ( a ) , b, c)
329
345
}
330
346
331
347
/// Multiplies the lower double-precision (64-bit) floating-point elements in
@@ -339,7 +355,11 @@ pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
339
355
#[ cfg_attr( test, assert_instr( vfnmadd) ) ]
340
356
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
341
357
pub unsafe fn _mm_fnmadd_sd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d {
342
- vfnmaddsd ( a, b, c)
358
+ simd_insert ! (
359
+ a,
360
+ 0 ,
361
+ _mm_cvtsd_f64( a) . mul_add( -_mm_cvtsd_f64( b) , _mm_cvtsd_f64( c) )
362
+ )
343
363
}
344
364
345
365
/// Multiplies the lower single-precision (32-bit) floating-point elements in
@@ -353,7 +373,11 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
353
373
#[ cfg_attr( test, assert_instr( vfnmadd) ) ]
354
374
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
355
375
pub unsafe fn _mm_fnmadd_ss ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 {
356
- vfnmaddss ( a, b, c)
376
+ simd_insert ! (
377
+ a,
378
+ 0 ,
379
+ _mm_cvtss_f32( a) . mul_add( -_mm_cvtss_f32( b) , _mm_cvtss_f32( c) )
380
+ )
357
381
}
358
382
359
383
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -366,7 +390,7 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
366
390
#[ cfg_attr( test, assert_instr( vfnmsub) ) ]
367
391
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
368
392
pub unsafe fn _mm_fnmsub_pd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d {
369
- vfnmsubpd ( a , b, c )
393
+ simd_fma ( simd_neg ( a ) , b, simd_neg ( c ) )
370
394
}
371
395
372
396
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -379,7 +403,7 @@ pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
379
403
#[ cfg_attr( test, assert_instr( vfnmsub) ) ]
380
404
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
381
405
pub unsafe fn _mm256_fnmsub_pd ( a : __m256d , b : __m256d , c : __m256d ) -> __m256d {
382
- vfnmsubpd256 ( a , b, c )
406
+ simd_fma ( simd_neg ( a ) , b, simd_neg ( c ) )
383
407
}
384
408
385
409
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -392,7 +416,7 @@ pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
392
416
#[ cfg_attr( test, assert_instr( vfnmsub) ) ]
393
417
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
394
418
pub unsafe fn _mm_fnmsub_ps ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 {
395
- vfnmsubps ( a , b, c )
419
+ simd_fma ( simd_neg ( a ) , b, simd_neg ( c ) )
396
420
}
397
421
398
422
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -405,7 +429,7 @@ pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
405
429
#[ cfg_attr( test, assert_instr( vfnmsub) ) ]
406
430
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
407
431
pub unsafe fn _mm256_fnmsub_ps ( a : __m256 , b : __m256 , c : __m256 ) -> __m256 {
408
- vfnmsubps256 ( a , b, c )
432
+ simd_fma ( simd_neg ( a ) , b, simd_neg ( c ) )
409
433
}
410
434
411
435
/// Multiplies the lower double-precision (64-bit) floating-point elements in
@@ -420,7 +444,11 @@ pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
420
444
#[ cfg_attr( test, assert_instr( vfnmsub) ) ]
421
445
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
422
446
pub unsafe fn _mm_fnmsub_sd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d {
423
- vfnmsubsd ( a, b, c)
447
+ simd_insert ! (
448
+ a,
449
+ 0 ,
450
+ _mm_cvtsd_f64( a) . mul_add( -_mm_cvtsd_f64( b) , -_mm_cvtsd_f64( c) )
451
+ )
424
452
}
425
453
426
454
/// Multiplies the lower single-precision (32-bit) floating-point elements in
@@ -435,15 +463,15 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
435
463
#[ cfg_attr( test, assert_instr( vfnmsub) ) ]
436
464
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
437
465
pub unsafe fn _mm_fnmsub_ss ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 {
438
- vfnmsubss ( a, b, c)
466
+ simd_insert ! (
467
+ a,
468
+ 0 ,
469
+ _mm_cvtss_f32( a) . mul_add( -_mm_cvtss_f32( b) , -_mm_cvtss_f32( c) )
470
+ )
439
471
}
440
472
441
473
#[ allow( improper_ctypes) ]
442
474
extern "C" {
443
- #[ link_name = "llvm.x86.fma.vfmadd.sd" ]
444
- fn vfmaddsd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d ;
445
- #[ link_name = "llvm.x86.fma.vfmadd.ss" ]
446
- fn vfmaddss ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 ;
447
475
#[ link_name = "llvm.x86.fma.vfmaddsub.pd" ]
448
476
fn vfmaddsubpd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d ;
449
477
#[ link_name = "llvm.x86.fma.vfmaddsub.pd.256" ]
@@ -452,18 +480,6 @@ extern "C" {
452
480
fn vfmaddsubps ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 ;
453
481
#[ link_name = "llvm.x86.fma.vfmaddsub.ps.256" ]
454
482
fn vfmaddsubps256 ( a : __m256 , b : __m256 , c : __m256 ) -> __m256 ;
455
- #[ link_name = "llvm.x86.fma.vfmsub.pd" ]
456
- fn vfmsubpd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d ;
457
- #[ link_name = "llvm.x86.fma.vfmsub.pd.256" ]
458
- fn vfmsubpd256 ( a : __m256d , b : __m256d , c : __m256d ) -> __m256d ;
459
- #[ link_name = "llvm.x86.fma.vfmsub.ps" ]
460
- fn vfmsubps ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 ;
461
- #[ link_name = "llvm.x86.fma.vfmsub.ps.256" ]
462
- fn vfmsubps256 ( a : __m256 , b : __m256 , c : __m256 ) -> __m256 ;
463
- #[ link_name = "llvm.x86.fma.vfmsub.sd" ]
464
- fn vfmsubsd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d ;
465
- #[ link_name = "llvm.x86.fma.vfmsub.ss" ]
466
- fn vfmsubss ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 ;
467
483
#[ link_name = "llvm.x86.fma.vfmsubadd.pd" ]
468
484
fn vfmsubaddpd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d ;
469
485
#[ link_name = "llvm.x86.fma.vfmsubadd.pd.256" ]
@@ -472,30 +488,6 @@ extern "C" {
472
488
fn vfmsubaddps ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 ;
473
489
#[ link_name = "llvm.x86.fma.vfmsubadd.ps.256" ]
474
490
fn vfmsubaddps256 ( a : __m256 , b : __m256 , c : __m256 ) -> __m256 ;
475
- #[ link_name = "llvm.x86.fma.vfnmadd.pd" ]
476
- fn vfnmaddpd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d ;
477
- #[ link_name = "llvm.x86.fma.vfnmadd.pd.256" ]
478
- fn vfnmaddpd256 ( a : __m256d , b : __m256d , c : __m256d ) -> __m256d ;
479
- #[ link_name = "llvm.x86.fma.vfnmadd.ps" ]
480
- fn vfnmaddps ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 ;
481
- #[ link_name = "llvm.x86.fma.vfnmadd.ps.256" ]
482
- fn vfnmaddps256 ( a : __m256 , b : __m256 , c : __m256 ) -> __m256 ;
483
- #[ link_name = "llvm.x86.fma.vfnmadd.sd" ]
484
- fn vfnmaddsd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d ;
485
- #[ link_name = "llvm.x86.fma.vfnmadd.ss" ]
486
- fn vfnmaddss ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 ;
487
- #[ link_name = "llvm.x86.fma.vfnmsub.pd" ]
488
- fn vfnmsubpd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d ;
489
- #[ link_name = "llvm.x86.fma.vfnmsub.pd.256" ]
490
- fn vfnmsubpd256 ( a : __m256d , b : __m256d , c : __m256d ) -> __m256d ;
491
- #[ link_name = "llvm.x86.fma.vfnmsub.ps" ]
492
- fn vfnmsubps ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 ;
493
- #[ link_name = "llvm.x86.fma.vfnmsub.ps.256" ]
494
- fn vfnmsubps256 ( a : __m256 , b : __m256 , c : __m256 ) -> __m256 ;
495
- #[ link_name = "llvm.x86.fma.vfnmsub.sd" ]
496
- fn vfnmsubsd ( a : __m128d , b : __m128d , c : __m128d ) -> __m128d ;
497
- #[ link_name = "llvm.x86.fma.vfnmsub.ss" ]
498
- fn vfnmsubss ( a : __m128 , b : __m128 , c : __m128 ) -> __m128 ;
499
491
}
500
492
501
493
#[ cfg( test) ]
0 commit comments