Skip to content

Commit e3b38a9

Browse files
committed
fixup! [ISel] Commute FMUL and inserting zero into vector lane
1 parent e08422d commit e3b38a9

File tree

2 files changed

+48
-4
lines changed

2 files changed

+48
-4
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26283,16 +26283,24 @@ static SDValue commuteInsertVectorEltFMul(SDNode *N, SelectionDAG &DAG) {
2628326283

2628426284
// Insert into the operand of FMUL instead.
2628526285
SDValue FMulOp = InsertVec.getOperand(0);
26286+
SDValue FMulOp2 = InsertVec.getOperand(1);
2628626287

26287-
if (!InsertVec.hasOneUse() || !FMulOp.hasOneUse())
26288+
if (!InsertVec.hasOneUse())
2628826289
return SDValue();
2628926290

26291+
if (!InsertVec->isOnlyUserOf(FMulOp.getNode())) {
26292+
if (!InsertVec->isOnlyUserOf(FMulOp2.getNode()))
26293+
return SDValue();
26294+
std::swap(FMulOp, FMulOp2);
26295+
}
26296+
2629026297
SDValue InsertOp =
2629126298
DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), FMulOp.getValueType(),
2629226299
FMulOp, InsertVal, InsertIdx);
26293-
SDValue FMul =
26294-
DAG.getNode(ISD::FMUL, SDLoc(InsertVec), InsertVec.getValueType(),
26295-
InsertOp, InsertVec.getOperand(1));
26300+
if (FMulOp == FMulOp2)
26301+
FMulOp2 = InsertOp;
26302+
SDValue FMul = DAG.getNode(ISD::FMUL, SDLoc(InsertVec),
26303+
InsertVec.getValueType(), InsertOp, FMulOp2);
2629626304
DAG.ReplaceAllUsesWith(N, &FMul);
2629726305
return FMul;
2629826306
}

llvm/test/CodeGen/AArch64/arm64-vmul.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,42 @@ define <4 x float> @fmul_insert_zero(<4 x float> %A, <4 x float> %B) {
11971197
ret <4 x float> %mul_set_lane
11981198
}
11991199

1200+
define <4 x float> @fmul_insert_zero_same(<4 x float> %A) {
1201+
; CHECK-LABEL: fmul_insert_zero_same:
1202+
; CHECK: // %bb.0:
1203+
; CHECK-NEXT: mov.s v0[3], wzr
1204+
; CHECK-NEXT: fmul.4s v0, v0, v0
1205+
; CHECK-NEXT: ret
1206+
%mul = fmul <4 x float> %A, %A
1207+
%mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
1208+
ret <4 x float> %mul_set_lane
1209+
}
1210+
1211+
define <4 x float> @fmul_insert_zero1(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
1212+
; CHECK-LABEL: fmul_insert_zero1:
1213+
; CHECK: // %bb.0:
1214+
; CHECK-NEXT: fsub.4s v0, v2, v0
1215+
; CHECK-NEXT: mov.s v1[3], wzr
1216+
; CHECK-NEXT: fmul.4s v0, v1, v0
1217+
; CHECK-NEXT: ret
1218+
%sub = fsub <4 x float> %C, %A
1219+
%mul = fmul <4 x float> %B, %sub
1220+
%mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
1221+
ret <4 x float> %mul_set_lane
1222+
}
1223+
1224+
define <4 x float> @fmul_insert_zero2(<4 x float> %A, <4 x float> %B) {
1225+
; CHECK-LABEL: fmul_insert_zero2:
1226+
; CHECK: // %bb.0:
1227+
; CHECK-NEXT: mov.s v0[3], wzr
1228+
; CHECK-NEXT: fmul.4s v0, v0, v1
1229+
; CHECK-NEXT: fsub.4s v0, v1, v0
1230+
; CHECK-NEXT: ret
1231+
%mul = fmul <4 x float> %B, %A
1232+
%mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
1233+
%sub = fsub <4 x float> %B, %mul_set_lane
1234+
ret <4 x float> %sub
1235+
}
12001236

12011237
define <2 x float> @fmulx_lane_2s(<2 x float> %A, <2 x float> %B) nounwind {
12021238
; CHECK-LABEL: fmulx_lane_2s:

0 commit comments

Comments
 (0)