Skip to content

Commit ba0e9ae

Browse files
committed
Combine vector FNEG+FMA into FNML[A|S]
This allows for FNEG + FMA sequences to be combined into a single operation, with `FNML[A|S]`, `FNMAD`, or `FNMSB` selected depending on the operand order.
1 parent 70f4b59 commit ba0e9ae

File tree

3 files changed

+332
-2
lines changed

3 files changed

+332
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,6 +1176,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11761176
setTargetDAGCombine(ISD::VECTOR_DEINTERLEAVE);
11771177
setTargetDAGCombine(ISD::CTPOP);
11781178

1179+
setTargetDAGCombine(ISD::FMA);
1180+
11791181
// In case of strict alignment, avoid an excessive number of byte wide stores.
11801182
MaxStoresPerMemsetOptSize = 8;
11811183
MaxStoresPerMemset =
@@ -20435,6 +20437,52 @@ static SDValue performFADDCombine(SDNode *N,
2043520437
return SDValue();
2043620438
}
2043720439

20440+
static SDValue performFMACombine(SDNode *N,
20441+
TargetLowering::DAGCombinerInfo &DCI,
20442+
const AArch64Subtarget *Subtarget) {
20443+
SelectionDAG &DAG = DCI.DAG;
20444+
SDValue Op1 = N->getOperand(0);
20445+
SDValue Op2 = N->getOperand(1);
20446+
SDValue Op3 = N->getOperand(2);
20447+
EVT VT = N->getValueType(0);
20448+
SDLoc DL(N);
20449+
20450+
// fma(a, b, neg(c)) -> fnmls(a, b, c)
20451+
// fma(neg(a), b, neg(c)) -> fnmla(a, b, c)
20452+
// fma(a, neg(b), neg(c)) -> fnmla(a, b, c)
20453+
if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
20454+
(Subtarget->hasSVE() || Subtarget->hasSME())) {
20455+
if (Op3.getOpcode() == ISD::FNEG) {
20456+
unsigned int Opcode;
20457+
if (Op1.getOpcode() == ISD::FNEG) {
20458+
Op1 = Op1.getOperand(0);
20459+
Opcode = AArch64ISD::FNMLA_PRED;
20460+
} else if (Op2.getOpcode() == ISD::FNEG) {
20461+
Op2 = Op2.getOperand(0);
20462+
Opcode = AArch64ISD::FNMLA_PRED;
20463+
} else {
20464+
Opcode = AArch64ISD::FNMLS_PRED;
20465+
}
20466+
Op3 = Op3.getOperand(0);
20467+
auto Pg = getPredicateForVector(DAG, DL, VT);
20468+
if (VT.isFixedLengthVector()) {
20469+
assert(DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
20470+
"Expected only legal fixed-width types");
20471+
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
20472+
Op1 = convertToScalableVector(DAG, ContainerVT, Op1);
20473+
Op2 = convertToScalableVector(DAG, ContainerVT, Op2);
20474+
Op3 = convertToScalableVector(DAG, ContainerVT, Op3);
20475+
auto ScalableRes =
20476+
DAG.getNode(Opcode, DL, ContainerVT, Pg, Op1, Op2, Op3);
20477+
return convertFromScalableVector(DAG, VT, ScalableRes);
20478+
}
20479+
return DAG.getNode(Opcode, DL, VT, Pg, Op1, Op2, Op3);
20480+
}
20481+
}
20482+
20483+
return SDValue();
20484+
}
20485+
2043820486
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
2043920487
switch (Opcode) {
2044020488
case ISD::STRICT_FADD:
@@ -27958,6 +28006,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2795828006
return performANDCombine(N, DCI);
2795928007
case ISD::FADD:
2796028008
return performFADDCombine(N, DCI);
28009+
case ISD::FMA:
28010+
return performFMACombine(N, DCI, Subtarget);
2796128011
case ISD::INTRINSIC_WO_CHAIN:
2796228012
return performIntrinsicCombine(N, DCI, Subtarget);
2796328013
case ISD::ANY_EXTEND:

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,8 @@ def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
240240
def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
241241
def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
242242
def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>;
243+
def AArch64fnmla_p_node : SDNode<"AArch64ISD::FNMLA_PRED", SDT_AArch64FMA>;
244+
def AArch64fnmls_p_node : SDNode<"AArch64ISD::FNMLS_PRED", SDT_AArch64FMA>;
243245

244246
def AArch64fadd_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3),
245247
(AArch64fadd_p node:$op1, node:$op2, node:$op3), [{
@@ -460,12 +462,14 @@ def AArch64fmlsidx : PatFrags<(ops node:$acc, node:$op1, node:$op2, node:$idx),
460462

461463

462464
def AArch64fnmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
463-
[(int_aarch64_sve_fnmla_u node:$pg, node:$za, node:$zn, node:$zm),
465+
[(AArch64fnmla_p_node node:$pg, node:$zn, node:$zm, node:$za),
466+
(int_aarch64_sve_fnmla_u node:$pg, node:$za, node:$zn, node:$zm),
464467
(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef))),
465468
(AArch64fneg_mt_nsz node:$pg, (AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za), (undef))]>;
466469

467470
def AArch64fnmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
468-
[(int_aarch64_sve_fnmls_u node:$pg, node:$za, node:$zn, node:$zm),
471+
[(AArch64fnmls_p_node node:$pg, node:$zn, node:$zm, node:$za),
472+
(int_aarch64_sve_fnmls_u node:$pg, node:$za, node:$zn, node:$zm),
469473
(AArch64fma_p node:$pg, node:$zn, node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef)))]>;
470474

471475
def AArch64fsubr_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=aarch64 -mattr=+v9a,+sve2,+crypto,+bf16,+sm4,+i8mm,+sve2-bitperm,+sve2-sha3,+sve2-aes,+sve2-sm4 %s -o - | FileCheck %s --check-prefixes=CHECK
3+
4+
define <vscale x 2 x double> @fmsub_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
5+
; CHECK-LABEL: fmsub_nxv2f64:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: ptrue p0.d
8+
; CHECK-NEXT: fnmsb z0.d, p0/m, z1.d, z2.d
9+
; CHECK-NEXT: ret
10+
entry:
11+
%neg = fneg <vscale x 2 x double> %c
12+
%0 = tail call <vscale x 2 x double> @llvm.fmuladd(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %neg)
13+
ret <vscale x 2 x double> %0
14+
}
15+
16+
define <vscale x 4 x float> @fmsub_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
17+
; CHECK-LABEL: fmsub_nxv4f32:
18+
; CHECK: // %bb.0: // %entry
19+
; CHECK-NEXT: ptrue p0.s
20+
; CHECK-NEXT: fnmsb z0.s, p0/m, z1.s, z2.s
21+
; CHECK-NEXT: ret
22+
entry:
23+
%neg = fneg <vscale x 4 x float> %c
24+
%0 = tail call <vscale x 4 x float> @llvm.fmuladd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %neg)
25+
ret <vscale x 4 x float> %0
26+
}
27+
28+
define <vscale x 8 x half> @fmsub_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
29+
; CHECK-LABEL: fmsub_nxv8f16:
30+
; CHECK: // %bb.0: // %entry
31+
; CHECK-NEXT: ptrue p0.h
32+
; CHECK-NEXT: fnmsb z0.h, p0/m, z1.h, z2.h
33+
; CHECK-NEXT: ret
34+
entry:
35+
%neg = fneg <vscale x 8 x half> %c
36+
%0 = tail call <vscale x 8 x half> @llvm.fmuladd(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %neg)
37+
ret <vscale x 8 x half> %0
38+
}
39+
40+
define <2 x double> @fmsub_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
41+
; CHECK-LABEL: fmsub_v2f64:
42+
; CHECK: // %bb.0: // %entry
43+
; CHECK-NEXT: ptrue p0.d, vl2
44+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
45+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
46+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
47+
; CHECK-NEXT: fnmsb z0.d, p0/m, z1.d, z2.d
48+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
49+
; CHECK-NEXT: ret
50+
entry:
51+
%neg = fneg <2 x double> %c
52+
%0 = tail call <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %neg)
53+
ret <2 x double> %0
54+
}
55+
56+
define <4 x float> @fmsub_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
57+
; CHECK-LABEL: fmsub_v4f32:
58+
; CHECK: // %bb.0: // %entry
59+
; CHECK-NEXT: ptrue p0.s, vl4
60+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
61+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
62+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
63+
; CHECK-NEXT: fnmsb z0.s, p0/m, z1.s, z2.s
64+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
65+
; CHECK-NEXT: ret
66+
entry:
67+
%neg = fneg <4 x float> %c
68+
%0 = tail call <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %neg)
69+
ret <4 x float> %0
70+
}
71+
72+
define <8 x half> @fmsub_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
73+
; CHECK-LABEL: fmsub_v8f16:
74+
; CHECK: // %bb.0: // %entry
75+
; CHECK-NEXT: ptrue p0.h, vl8
76+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
77+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
78+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
79+
; CHECK-NEXT: fnmsb z0.h, p0/m, z1.h, z2.h
80+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
81+
; CHECK-NEXT: ret
82+
entry:
83+
%neg = fneg <8 x half> %c
84+
%0 = tail call <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
85+
ret <8 x half> %0
86+
}
87+
88+
89+
define <2 x double> @fmsub_flipped_v2f64(<2 x double> %c, <2 x double> %a, <2 x double> %b) {
90+
; CHECK-LABEL: fmsub_flipped_v2f64:
91+
; CHECK: // %bb.0: // %entry
92+
; CHECK-NEXT: ptrue p0.d, vl2
93+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
94+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
95+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
96+
; CHECK-NEXT: fnmls z0.d, p0/m, z1.d, z2.d
97+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
98+
; CHECK-NEXT: ret
99+
entry:
100+
%neg = fneg <2 x double> %c
101+
%0 = tail call <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %neg)
102+
ret <2 x double> %0
103+
}
104+
105+
define <4 x float> @fmsub_flipped_v4f32(<4 x float> %c, <4 x float> %a, <4 x float> %b) {
106+
; CHECK-LABEL: fmsub_flipped_v4f32:
107+
; CHECK: // %bb.0: // %entry
108+
; CHECK-NEXT: ptrue p0.s, vl4
109+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
110+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
111+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
112+
; CHECK-NEXT: fnmls z0.s, p0/m, z1.s, z2.s
113+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
114+
; CHECK-NEXT: ret
115+
entry:
116+
%neg = fneg <4 x float> %c
117+
%0 = tail call <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %neg)
118+
ret <4 x float> %0
119+
}
120+
121+
define <8 x half> @fmsub_flipped_v8f16(<8 x half> %c, <8 x half> %a, <8 x half> %b) {
122+
; CHECK-LABEL: fmsub_flipped_v8f16:
123+
; CHECK: // %bb.0: // %entry
124+
; CHECK-NEXT: ptrue p0.h, vl8
125+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
126+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
127+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
128+
; CHECK-NEXT: fnmls z0.h, p0/m, z1.h, z2.h
129+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
130+
; CHECK-NEXT: ret
131+
entry:
132+
%neg = fneg <8 x half> %c
133+
%0 = tail call <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
134+
ret <8 x half> %0
135+
}
136+
137+
define <vscale x 2 x double> @fnmsub_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
138+
; CHECK-LABEL: fnmsub_nxv2f64:
139+
; CHECK: // %bb.0: // %entry
140+
; CHECK-NEXT: ptrue p0.d
141+
; CHECK-NEXT: fnmad z0.d, p0/m, z1.d, z2.d
142+
; CHECK-NEXT: ret
143+
entry:
144+
%neg = fneg <vscale x 2 x double> %a
145+
%neg1 = fneg <vscale x 2 x double> %c
146+
%0 = tail call <vscale x 2 x double> @llvm.fmuladd(<vscale x 2 x double> %neg, <vscale x 2 x double> %b, <vscale x 2 x double> %neg1)
147+
ret <vscale x 2 x double> %0
148+
}
149+
150+
define <vscale x 4 x float> @fnmsub_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
151+
; CHECK-LABEL: fnmsub_nxv4f32:
152+
; CHECK: // %bb.0: // %entry
153+
; CHECK-NEXT: ptrue p0.s
154+
; CHECK-NEXT: fnmad z0.s, p0/m, z1.s, z2.s
155+
; CHECK-NEXT: ret
156+
entry:
157+
%neg = fneg <vscale x 4 x float> %a
158+
%neg1 = fneg <vscale x 4 x float> %c
159+
%0 = tail call <vscale x 4 x float> @llvm.fmuladd(<vscale x 4 x float> %neg, <vscale x 4 x float> %b, <vscale x 4 x float> %neg1)
160+
ret <vscale x 4 x float> %0
161+
}
162+
163+
define <vscale x 8 x half> @fnmsub_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
164+
; CHECK-LABEL: fnmsub_nxv8f16:
165+
; CHECK: // %bb.0: // %entry
166+
; CHECK-NEXT: ptrue p0.h
167+
; CHECK-NEXT: fnmad z0.h, p0/m, z1.h, z2.h
168+
; CHECK-NEXT: ret
169+
entry:
170+
%neg = fneg <vscale x 8 x half> %a
171+
%neg1 = fneg <vscale x 8 x half> %c
172+
%0 = tail call <vscale x 8 x half> @llvm.fmuladd(<vscale x 8 x half> %neg, <vscale x 8 x half> %b, <vscale x 8 x half> %neg1)
173+
ret <vscale x 8 x half> %0
174+
}
175+
176+
define <2 x double> @fnmsub_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
177+
; CHECK-LABEL: fnmsub_v2f64:
178+
; CHECK: // %bb.0: // %entry
179+
; CHECK-NEXT: ptrue p0.d, vl2
180+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
181+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
182+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
183+
; CHECK-NEXT: fnmad z0.d, p0/m, z1.d, z2.d
184+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
185+
; CHECK-NEXT: ret
186+
entry:
187+
%neg = fneg <2 x double> %a
188+
%neg1 = fneg <2 x double> %c
189+
%0 = tail call <2 x double> @llvm.fmuladd(<2 x double> %neg, <2 x double> %b, <2 x double> %neg1)
190+
ret <2 x double> %0
191+
}
192+
193+
define <4 x float> @fnmsub_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
194+
; CHECK-LABEL: fnmsub_v4f32:
195+
; CHECK: // %bb.0: // %entry
196+
; CHECK-NEXT: ptrue p0.s, vl4
197+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
198+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
199+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
200+
; CHECK-NEXT: fnmad z0.s, p0/m, z1.s, z2.s
201+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
202+
; CHECK-NEXT: ret
203+
entry:
204+
%neg = fneg <4 x float> %a
205+
%neg1 = fneg <4 x float> %c
206+
%0 = tail call <4 x float> @llvm.fmuladd(<4 x float> %neg, <4 x float> %b, <4 x float> %neg1)
207+
ret <4 x float> %0
208+
}
209+
210+
define <8 x half> @fnmsub_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
211+
; CHECK-LABEL: fnmsub_v8f16:
212+
; CHECK: // %bb.0: // %entry
213+
; CHECK-NEXT: ptrue p0.h, vl8
214+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
215+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
216+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
217+
; CHECK-NEXT: fnmad z0.h, p0/m, z1.h, z2.h
218+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
219+
; CHECK-NEXT: ret
220+
entry:
221+
%neg = fneg <8 x half> %a
222+
%neg1 = fneg <8 x half> %c
223+
%0 = tail call <8 x half> @llvm.fmuladd(<8 x half> %neg, <8 x half> %b, <8 x half> %neg1)
224+
ret <8 x half> %0
225+
}
226+
227+
define <2 x double> @fnmsub_flipped_v2f64(<2 x double> %c, <2 x double> %a, <2 x double> %b) {
228+
; CHECK-LABEL: fnmsub_flipped_v2f64:
229+
; CHECK: // %bb.0: // %entry
230+
; CHECK-NEXT: ptrue p0.d, vl2
231+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
232+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
233+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
234+
; CHECK-NEXT: fnmla z0.d, p0/m, z1.d, z2.d
235+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
236+
; CHECK-NEXT: ret
237+
entry:
238+
%neg = fneg <2 x double> %a
239+
%neg1 = fneg <2 x double> %c
240+
%0 = tail call <2 x double> @llvm.fmuladd(<2 x double> %neg, <2 x double> %b, <2 x double> %neg1)
241+
ret <2 x double> %0
242+
}
243+
244+
define <4 x float> @fnmsub_flipped_v4f32(<4 x float> %c, <4 x float> %a, <4 x float> %b) {
245+
; CHECK-LABEL: fnmsub_flipped_v4f32:
246+
; CHECK: // %bb.0: // %entry
247+
; CHECK-NEXT: ptrue p0.s, vl4
248+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
249+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
250+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
251+
; CHECK-NEXT: fnmla z0.s, p0/m, z1.s, z2.s
252+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
253+
; CHECK-NEXT: ret
254+
entry:
255+
%neg = fneg <4 x float> %a
256+
%neg1 = fneg <4 x float> %c
257+
%0 = tail call <4 x float> @llvm.fmuladd(<4 x float> %neg, <4 x float> %b, <4 x float> %neg1)
258+
ret <4 x float> %0
259+
}
260+
261+
define <8 x half> @fnmsub_flipped_v8f16(<8 x half> %c, <8 x half> %a, <8 x half> %b) {
262+
; CHECK-LABEL: fnmsub_flipped_v8f16:
263+
; CHECK: // %bb.0: // %entry
264+
; CHECK-NEXT: ptrue p0.h, vl8
265+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
266+
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
267+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
268+
; CHECK-NEXT: fnmla z0.h, p0/m, z1.h, z2.h
269+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
270+
; CHECK-NEXT: ret
271+
entry:
272+
%neg = fneg <8 x half> %a
273+
%neg1 = fneg <8 x half> %c
274+
%0 = tail call <8 x half> @llvm.fmuladd(<8 x half> %neg, <8 x half> %b, <8 x half> %neg1)
275+
ret <8 x half> %0
276+
}

0 commit comments

Comments
 (0)