|
30 | 30 | // Altivec transformation functions and pattern fragments.
|
31 | 31 | //
|
32 | 32 |
|
| 33 | +// fneg is not legal, and desugared as an xor. |
| 34 | +def desugared_fneg : PatFrag<(ops node:$x), (v4f32 (bitconvert (xor (bitconvert $x), |
| 35 | + (int_ppc_altivec_vslw (bitconvert (v16i8 immAllOnesV)), |
| 36 | + (bitconvert (v16i8 immAllOnesV))))))>; |
| 37 | + |
33 | 38 | def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
34 | 39 | (vector_shuffle node:$lhs, node:$rhs), [{
|
35 | 40 | return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
|
@@ -467,11 +472,12 @@ def VMADDFP : VAForm_1<46, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
|
467 | 472 | [(set v4f32:$RT,
|
468 | 473 | (fma v4f32:$RA, v4f32:$RC, v4f32:$RB))]>;
|
469 | 474 |
|
470 |
| -// FIXME: The fma+fneg pattern won't match because fneg is not legal. |
| 475 | +// fneg is not legal, hence we have to match on the desugared version. |
471 | 476 | def VNMSUBFP: VAForm_1<47, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
|
472 | 477 | "vnmsubfp $RT, $RA, $RC, $RB", IIC_VecFP,
|
473 |
| - [(set v4f32:$RT, (fneg (fma v4f32:$RA, v4f32:$RC, |
474 |
| - (fneg v4f32:$RB))))]>; |
| 478 | + [(set v4f32:$RT, (desugared_fneg (fma v4f32:$RA, v4f32:$RC, |
| 479 | + (desugared_fneg v4f32:$RB))))]>; |
| 480 | + |
475 | 481 | let hasSideEffects = 1 in {
|
476 | 482 | def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
|
477 | 483 | def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
|
@@ -892,6 +898,13 @@ def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>;
|
892 | 898 | // Add
|
893 | 899 | def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>;
|
894 | 900 |
|
| 901 | + |
| 902 | +// Fused negated multiply-subtract |
| 903 | +def : Pat<(v4f32 (desugared_fneg |
| 904 | + (int_ppc_altivec_vmaddfp v4f32:$RA, v4f32:$RC, |
| 905 | + (desugared_fneg v4f32:$RB)))), |
| 906 | + (VNMSUBFP $RA, $RC, $RB)>; |
| 907 | + |
895 | 908 | // Saturating adds/subtracts.
|
896 | 909 | def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
|
897 | 910 | def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;
|
|
0 commit comments