Skip to content

Commit dd43ed9

Browse files
committed
Address feedback
1 parent 2879d78 commit dd43ed9

File tree

3 files changed

+31
-42
lines changed

3 files changed

+31
-42
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1525,9 +1525,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
15251525
for (auto VT : {MVT::v16i8, MVT::v8i8, MVT::v4i16, MVT::v2i32})
15261526
setOperationAction(ISD::GET_ACTIVE_LANE_MASK, VT, Custom);
15271527

1528-
for (auto VT : {MVT::v8f16, MVT::v4f32, MVT::v2f64}) {
1528+
for (auto VT : {MVT::v8f16, MVT::v4f32, MVT::v2f64})
15291529
setOperationAction(ISD::FMA, VT, Custom);
1530-
}
15311530
}
15321531

15331532
if (Subtarget->isSVEorStreamingSVEAvailable()) {
@@ -7743,7 +7742,10 @@ SDValue AArch64TargetLowering::LowerFMA(SDValue Op, SelectionDAG &DAG) const {
77437742

77447743
// Bail early if we're definitely not looking to merge FNEGs into the FMA.
77457744
if (!VT.isFixedLengthVector() || OpC.getOpcode() != ISD::FNEG) {
7746-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
7745+
if (VT.isScalableVector() || VT.getScalarType() == MVT::bf16 ||
7746+
useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
7747+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
7748+
return Op; // Fallback to NEON lowering.
77477749
}
77487750

77497751
// Convert FMA/FNEG nodes to SVE to enable the following patterns:
@@ -7753,13 +7755,18 @@ SDValue AArch64TargetLowering::LowerFMA(SDValue Op, SelectionDAG &DAG) const {
77537755
SDValue Pg = getPredicateForVector(DAG, DL, VT);
77547756
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
77557757

7756-
for (SDValue *Op : {&OpA, &OpB, &OpC}) {
7757-
// Reuse `LowerToPredicatedOp` but drop the subsequent `extract_subvector`
7758-
*Op = Op->getOpcode() == ISD::FNEG
7759-
? LowerToPredicatedOp(*Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU)
7760-
->getOperand(0)
7761-
: convertToScalableVector(DAG, ContainerVT, *Op);
7762-
}
7758+
// Reuse `LowerToPredicatedOp` but drop the subsequent `extract_subvector`
7759+
OpA = OpA.getOpcode() == ISD::FNEG
7760+
? LowerToPredicatedOp(OpA, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU)
7761+
->getOperand(0)
7762+
: convertToScalableVector(DAG, ContainerVT, OpA);
7763+
OpB = OpB.getOpcode() == ISD::FNEG
7764+
? LowerToPredicatedOp(OpB, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU)
7765+
->getOperand(0)
7766+
: convertToScalableVector(DAG, ContainerVT, OpB);
7767+
OpC = LowerToPredicatedOp(OpC, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU)
7768+
->getOperand(0);
7769+
77637770
SDValue ScalableRes =
77647771
DAG.getNode(AArch64ISD::FMA_PRED, DL, ContainerVT, Pg, OpA, OpB, OpC);
77657772
return convertFromScalableVector(DAG, VT, ScalableRes);

llvm/test/CodeGen/AArch64/complex-deinterleaving-symmetric-fixed.ll

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,13 @@ define <4 x double> @simple_symmetric_muladd2(<4 x double> %a, <4 x double> %b)
77
; CHECK-LABEL: simple_symmetric_muladd2:
88
; CHECK: // %bb.0: // %entry
99
; CHECK-NEXT: mov x8, #-7378697629483820647 // =0x9999999999999999
10-
; CHECK-NEXT: ptrue p0.d, vl2
11-
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
12-
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
13-
; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
14-
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
1510
; CHECK-NEXT: movk x8, #39322
1611
; CHECK-NEXT: movk x8, #16393, lsl #48
1712
; CHECK-NEXT: dup v4.2d, x8
18-
; CHECK-NEXT: fmad z0.d, p0/m, z4.d, z2.d
19-
; CHECK-NEXT: fmad z1.d, p0/m, z4.d, z3.d
20-
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
21-
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
13+
; CHECK-NEXT: fmla v2.2d, v4.2d, v0.2d
14+
; CHECK-NEXT: fmla v3.2d, v4.2d, v1.2d
15+
; CHECK-NEXT: mov v0.16b, v2.16b
16+
; CHECK-NEXT: mov v1.16b, v3.16b
2217
; CHECK-NEXT: ret
2318
entry:
2419
%ext00 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
@@ -48,11 +43,10 @@ define <8 x double> @simple_symmetric_muladd4(<8 x double> %a, <8 x double> %b)
4843
; CHECK-NEXT: zip1 v17.2d, v5.2d, v7.2d
4944
; CHECK-NEXT: zip2 v5.2d, v5.2d, v7.2d
5045
; CHECK-NEXT: dup v6.2d, x8
51-
; CHECK-NEXT: ptrue p0.d, vl2
52-
; CHECK-NEXT: fmla z3.d, p0/m, z16.d, z6.d
53-
; CHECK-NEXT: fmla z4.d, p0/m, z0.d, z6.d
54-
; CHECK-NEXT: fmla z17.d, p0/m, z2.d, z6.d
55-
; CHECK-NEXT: fmla z5.d, p0/m, z1.d, z6.d
46+
; CHECK-NEXT: fmla v3.2d, v6.2d, v16.2d
47+
; CHECK-NEXT: fmla v4.2d, v6.2d, v0.2d
48+
; CHECK-NEXT: fmla v17.2d, v6.2d, v2.2d
49+
; CHECK-NEXT: fmla v5.2d, v6.2d, v1.2d
5650
; CHECK-NEXT: zip1 v0.2d, v3.2d, v4.2d
5751
; CHECK-NEXT: zip2 v2.2d, v3.2d, v4.2d
5852
; CHECK-NEXT: zip1 v1.2d, v17.2d, v5.2d

llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -620,12 +620,8 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
620620
define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) vscale_range(2,0) #0 {
621621
; CHECK-LABEL: fma_v8f16:
622622
; CHECK: // %bb.0:
623-
; CHECK-NEXT: ptrue p0.h, vl8
624-
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
625-
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
626-
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
627-
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
628-
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
623+
; CHECK-NEXT: fmla v2.8h, v1.8h, v0.8h
624+
; CHECK-NEXT: mov v0.16b, v2.16b
629625
; CHECK-NEXT: ret
630626
%res = call <8 x half> @llvm.fma.v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
631627
ret <8 x half> %res
@@ -734,12 +730,8 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o
734730
define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) vscale_range(2,0) #0 {
735731
; CHECK-LABEL: fma_v4f32:
736732
; CHECK: // %bb.0:
737-
; CHECK-NEXT: ptrue p0.s, vl4
738-
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
739-
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
740-
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
741-
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
742-
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
733+
; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
734+
; CHECK-NEXT: mov v0.16b, v2.16b
743735
; CHECK-NEXT: ret
744736
%res = call <4 x float> @llvm.fma.v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3)
745737
ret <4 x float> %res
@@ -847,12 +839,8 @@ define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double
847839
define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) vscale_range(2,0) #0 {
848840
; CHECK-LABEL: fma_v2f64:
849841
; CHECK: // %bb.0:
850-
; CHECK-NEXT: ptrue p0.d, vl2
851-
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
852-
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
853-
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
854-
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
855-
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
842+
; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d
843+
; CHECK-NEXT: mov v0.16b, v2.16b
856844
; CHECK-NEXT: ret
857845
%res = call <2 x double> @llvm.fma.v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3)
858846
ret <2 x double> %res

0 commit comments

Comments
 (0)