Skip to content

Commit 3f3d522

Browse files
authored
[PowerPC] recognize vmnsub in older ppc versions (#155465)
fixes #129432 Recognize expansion sequence of negate where it isn't legal in order to select multiply-subtract.
1 parent c52de9a commit 3f3d522

File tree

2 files changed

+52
-3
lines changed

2 files changed

+52
-3
lines changed

llvm/lib/Target/PowerPC/PPCInstrAltivec.td

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@
3030
// Altivec transformation functions and pattern fragments.
3131
//
3232

33+
// fneg is not legal, and desugared as an xor.
34+
def desugared_fneg : PatFrag<(ops node:$x), (v4f32 (bitconvert (xor (bitconvert $x),
35+
(int_ppc_altivec_vslw (bitconvert (v16i8 immAllOnesV)),
36+
(bitconvert (v16i8 immAllOnesV))))))>;
37+
3338
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
3439
(vector_shuffle node:$lhs, node:$rhs), [{
3540
return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
@@ -467,11 +472,12 @@ def VMADDFP : VAForm_1<46, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
467472
[(set v4f32:$RT,
468473
(fma v4f32:$RA, v4f32:$RC, v4f32:$RB))]>;
469474

470-
// FIXME: The fma+fneg pattern won't match because fneg is not legal.
475+
// fneg is not legal, hence we have to match on the desugared version.
471476
def VNMSUBFP: VAForm_1<47, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
472477
"vnmsubfp $RT, $RA, $RC, $RB", IIC_VecFP,
473-
[(set v4f32:$RT, (fneg (fma v4f32:$RA, v4f32:$RC,
474-
(fneg v4f32:$RB))))]>;
478+
[(set v4f32:$RT, (desugared_fneg (fma v4f32:$RA, v4f32:$RC,
479+
(desugared_fneg v4f32:$RB))))]>;
480+
475481
let hasSideEffects = 1 in {
476482
def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
477483
def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
@@ -892,6 +898,13 @@ def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>;
892898
// Add
893899
def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>;
894900

901+
902+
// Fused negated multiply-subtract
903+
def : Pat<(v4f32 (desugared_fneg
904+
(int_ppc_altivec_vmaddfp v4f32:$RA, v4f32:$RC,
905+
(desugared_fneg v4f32:$RB)))),
906+
(VNMSUBFP $RA, $RC, $RB)>;
907+
895908
// Saturating adds/subtracts.
896909
def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
897910
def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -verify-machineinstrs < %s -mcpu=pwr5 -mtriple=ppc32-- -mattr=+altivec | FileCheck %s
3+
4+
define dso_local <4 x float> @intrinsic(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr {
5+
; CHECK-LABEL: intrinsic:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vnmsubfp 2, 2, 3, 4
8+
; CHECK-NEXT: blr
9+
entry:
10+
%0 = tail call <4 x float> @llvm.ppc.altivec.vnmsubfp(<4 x float> %a, <4 x float> %b, <4 x float> %c)
11+
ret <4 x float> %0
12+
}
13+
14+
define <4 x float> @manual_llvm_fma(<4 x float> %a, <4 x float> %b, <4 x float> %c) unnamed_addr {
15+
; CHECK-LABEL: manual_llvm_fma:
16+
; CHECK: # %bb.0: # %start
17+
; CHECK-NEXT: vnmsubfp 2, 2, 3, 4
18+
; CHECK-NEXT: blr
19+
start:
20+
%0 = fneg <4 x float> %c
21+
%1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %0)
22+
%2 = fneg <4 x float> %1
23+
ret <4 x float> %2
24+
}
25+
26+
define dso_local <4 x float> @manual_vmaddfp(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr {
27+
; CHECK-LABEL: manual_vmaddfp:
28+
; CHECK: # %bb.0: # %entry
29+
; CHECK-NEXT: vnmsubfp 2, 2, 3, 4
30+
; CHECK-NEXT: blr
31+
entry:
32+
%fneg.i3 = fneg <4 x float> %c
33+
%0 = tail call <4 x float> @llvm.ppc.altivec.vmaddfp(<4 x float> %a, <4 x float> %b, <4 x float> %fneg.i3)
34+
%fneg.i = fneg <4 x float> %0
35+
ret <4 x float> %fneg.i
36+
}

0 commit comments

Comments
 (0)