Skip to content

Commit 9cd5327

Browse files
committed
fixup! fixup! [ISel] Commute FMUL and inserting zero into vector lane
1 parent 24c4f36 commit 9cd5327

File tree

2 files changed

+33
-9
lines changed

2 files changed

+33
-9
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26494,9 +26494,16 @@ static SDValue commuteInsertVectorEltFMul(SDNode *N, SelectionDAG &DAG) {
2649426494
// Only handle constant 0 insertion...
2649526495
if (!(isNullConstant(InsertVal) || isNullFPConstant(InsertVal)))
2649626496
return SDValue();
26497-
// ... into the result of an FMUL.
26497+
// ... into the result of an FMUL ...
2649826498
if (InsertVec.getOpcode() != ISD::FMUL)
2649926499
return SDValue();
26500+
/// ... and only when x * 0 = 0.
26501+
auto Flags = InsertVec->getFlags();
26502+
auto Options = DAG.getTarget().Options;
26503+
if ((!Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
26504+
(!Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
26505+
(!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()))
26506+
return SDValue();
2650026507

2650126508
// Insert into the operand of FMUL instead.
2650226509
SDValue FMulOp = InsertVec.getOperand(0);
@@ -26518,6 +26525,7 @@ static SDValue commuteInsertVectorEltFMul(SDNode *N, SelectionDAG &DAG) {
2651826525
FMulOp2 = InsertOp;
2651926526
SDValue FMul = DAG.getNode(ISD::FMUL, SDLoc(InsertVec),
2652026527
InsertVec.getValueType(), InsertOp, FMulOp2);
26528+
FMul->setFlags(Flags);
2652126529
DAG.ReplaceAllUsesWith(N, &FMul);
2652226530
return FMul;
2652326531
}

llvm/test/CodeGen/AArch64/arm64-vmul.ll

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,51 +1189,67 @@ define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind {
11891189
define <4 x float> @fmul_insert_zero(<4 x float> %A, <4 x float> %B) {
11901190
; CHECK-LABEL: fmul_insert_zero:
11911191
; CHECK: // %bb.0:
1192-
; CHECK-NEXT: mov.s v0[3], wzr
1192+
; CHECK-NEXT: movi d2, #0000000000000000
1193+
; CHECK-NEXT: mov.s v0[3], v2[0]
11931194
; CHECK-NEXT: fmul.4s v0, v0, v1
11941195
; CHECK-NEXT: ret
1195-
%mul = fmul <4 x float> %A, %B
1196+
%mul = fmul fast <4 x float> %A, %B
11961197
%mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
11971198
ret <4 x float> %mul_set_lane
11981199
}
11991200

12001201
define <4 x float> @fmul_insert_zero_same(<4 x float> %A) {
12011202
; CHECK-LABEL: fmul_insert_zero_same:
12021203
; CHECK: // %bb.0:
1203-
; CHECK-NEXT: mov.s v0[3], wzr
1204+
; CHECK-NEXT: movi d1, #0000000000000000
1205+
; CHECK-NEXT: mov.s v0[3], v1[0]
12041206
; CHECK-NEXT: fmul.4s v0, v0, v0
12051207
; CHECK-NEXT: ret
1206-
%mul = fmul <4 x float> %A, %A
1208+
%mul = fmul fast <4 x float> %A, %A
12071209
%mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
12081210
ret <4 x float> %mul_set_lane
12091211
}
12101212

12111213
define <4 x float> @fmul_insert_zero1(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
12121214
; CHECK-LABEL: fmul_insert_zero1:
12131215
; CHECK: // %bb.0:
1216+
; CHECK-NEXT: movi d3, #0000000000000000
12141217
; CHECK-NEXT: fsub.4s v0, v2, v0
1215-
; CHECK-NEXT: mov.s v1[3], wzr
1218+
; CHECK-NEXT: mov.s v1[3], v3[0]
12161219
; CHECK-NEXT: fmul.4s v0, v1, v0
12171220
; CHECK-NEXT: ret
12181221
%sub = fsub <4 x float> %C, %A
1219-
%mul = fmul <4 x float> %B, %sub
1222+
%mul = fmul fast <4 x float> %B, %sub
12201223
%mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
12211224
ret <4 x float> %mul_set_lane
12221225
}
12231226

12241227
define <4 x float> @fmul_insert_zero2(<4 x float> %A, <4 x float> %B) {
12251228
; CHECK-LABEL: fmul_insert_zero2:
12261229
; CHECK: // %bb.0:
1227-
; CHECK-NEXT: mov.s v0[3], wzr
1230+
; CHECK-NEXT: movi d2, #0000000000000000
1231+
; CHECK-NEXT: mov.s v0[3], v2[0]
12281232
; CHECK-NEXT: fmul.4s v0, v0, v1
12291233
; CHECK-NEXT: fsub.4s v0, v1, v0
12301234
; CHECK-NEXT: ret
1231-
%mul = fmul <4 x float> %B, %A
1235+
%mul = fmul fast <4 x float> %B, %A
12321236
%mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
12331237
%sub = fsub <4 x float> %B, %mul_set_lane
12341238
ret <4 x float> %sub
12351239
}
12361240

1241+
define <4 x float> @fmul_insert_zero_nofast(<4 x float> %A, <4 x float> %B) {
1242+
; CHECK-LABEL: fmul_insert_zero_nofast:
1243+
; CHECK: // %bb.0:
1244+
; CHECK-NEXT: movi d2, #0000000000000000
1245+
; CHECK-NEXT: fmul.4s v0, v0, v1
1246+
; CHECK-NEXT: mov.s v0[3], v2[0]
1247+
; CHECK-NEXT: ret
1248+
%mul = fmul <4 x float> %A, %B
1249+
%mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
1250+
ret <4 x float> %mul_set_lane
1251+
}
1252+
12371253
define <2 x float> @fmulx_lane_2s(<2 x float> %A, <2 x float> %B) nounwind {
12381254
; CHECK-LABEL: fmulx_lane_2s:
12391255
; CHECK: // %bb.0:

0 commit comments

Comments
 (0)