Skip to content

Commit f209d63

Browse files
authored
[SelectionDAGBuilder][PPC] Use getShiftAmountConstant. (#158400)
The PowerPC changes are caused by shifts created by different IR operations being CSEd now. This allows consecutive loads to be turned into vectors earlier. This has effects on the ordering of other combines and legalizations. This leads to some improvements and some regressions.
1 parent 40f2da5 commit f209d63

File tree

3 files changed

+43
-54
lines changed

3 files changed

+43
-54
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,9 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
223223
std::swap(Lo, Hi);
224224
EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
225225
Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
226-
Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
227-
DAG.getConstant(Lo.getValueSizeInBits(), DL,
228-
TLI.getShiftAmountTy(
229-
TotalVT, DAG.getDataLayout())));
226+
Hi = DAG.getNode(
227+
ISD::SHL, DL, TotalVT, Hi,
228+
DAG.getShiftAmountConstant(Lo.getValueSizeInBits(), TotalVT, DL));
230229
Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
231230
Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
232231
}
@@ -4469,9 +4468,10 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
44694468
if (ElementMul != 1) {
44704469
if (ElementMul.isPowerOf2()) {
44714470
unsigned Amt = ElementMul.logBase2();
4472-
IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN,
4473-
DAG.getConstant(Amt, dl, IdxN.getValueType()),
4474-
ScaleFlags);
4471+
IdxN = DAG.getNode(
4472+
ISD::SHL, dl, N.getValueType(), IdxN,
4473+
DAG.getShiftAmountConstant(Amt, N.getValueType(), dl),
4474+
ScaleFlags);
44754475
} else {
44764476
SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
44774477
IdxN.getValueType());
@@ -5460,10 +5460,8 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
54605460
const TargetLowering &TLI, const SDLoc &dl) {
54615461
SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
54625462
DAG.getConstant(0x7f800000, dl, MVT::i32));
5463-
SDValue t1 = DAG.getNode(
5464-
ISD::SRL, dl, MVT::i32, t0,
5465-
DAG.getConstant(23, dl,
5466-
TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
5463+
SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
5464+
DAG.getShiftAmountConstant(23, MVT::i32, dl));
54675465
SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
54685466
DAG.getConstant(127, dl, MVT::i32));
54695467
return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
@@ -5488,11 +5486,8 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
54885486
SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
54895487

54905488
// IntegerPartOfX <<= 23;
5491-
IntegerPartOfX =
5492-
DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
5493-
DAG.getConstant(23, dl,
5494-
DAG.getTargetLoweringInfo().getShiftAmountTy(
5495-
MVT::i32, DAG.getDataLayout())));
5489+
IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
5490+
DAG.getShiftAmountConstant(23, MVT::i32, dl));
54965491

54975492
SDValue TwoToFractionalPartOfX;
54985493
if (LimitFloatPrecision <= 6) {

llvm/test/CodeGen/PowerPC/build-vector-tests.ll

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,12 +1036,8 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
10361036
; P9LE: # %bb.0: # %entry
10371037
; P9LE-NEXT: sldi r4, r4, 2
10381038
; P9LE-NEXT: add r3, r3, r4
1039-
; P9LE-NEXT: li r4, -12
1040-
; P9LE-NEXT: lxvx v2, r3, r4
1041-
; P9LE-NEXT: addis r3, r2, .LCPI9_0@toc@ha
1042-
; P9LE-NEXT: addi r3, r3, .LCPI9_0@toc@l
1043-
; P9LE-NEXT: lxv vs0, 0(r3)
1044-
; P9LE-NEXT: xxperm v2, v2, vs0
1039+
; P9LE-NEXT: addi r3, r3, -12
1040+
; P9LE-NEXT: lxvw4x v2, 0, r3
10451041
; P9LE-NEXT: blr
10461042
;
10471043
; P8BE-LABEL: fromDiffMemVarDi:
@@ -1058,15 +1054,16 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
10581054
;
10591055
; P8LE-LABEL: fromDiffMemVarDi:
10601056
; P8LE: # %bb.0: # %entry
1061-
; P8LE-NEXT: addis r5, r2, .LCPI9_0@toc@ha
10621057
; P8LE-NEXT: sldi r4, r4, 2
1063-
; P8LE-NEXT: addi r5, r5, .LCPI9_0@toc@l
10641058
; P8LE-NEXT: add r3, r3, r4
1065-
; P8LE-NEXT: lxvd2x vs0, 0, r5
10661059
; P8LE-NEXT: addi r3, r3, -12
1067-
; P8LE-NEXT: lxvd2x v3, 0, r3
1060+
; P8LE-NEXT: lxvd2x vs0, 0, r3
1061+
; P8LE-NEXT: addis r3, r2, .LCPI9_0@toc@ha
1062+
; P8LE-NEXT: addi r3, r3, .LCPI9_0@toc@l
10681063
; P8LE-NEXT: xxswapd v2, vs0
1069-
; P8LE-NEXT: vperm v2, v3, v3, v2
1064+
; P8LE-NEXT: lxvd2x vs0, 0, r3
1065+
; P8LE-NEXT: xxswapd v3, vs0
1066+
; P8LE-NEXT: vperm v2, v2, v2, v3
10701067
; P8LE-NEXT: blr
10711068
entry:
10721069
%idxprom = sext i32 %elem to i64
@@ -2524,12 +2521,8 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
25242521
; P9LE: # %bb.0: # %entry
25252522
; P9LE-NEXT: sldi r4, r4, 2
25262523
; P9LE-NEXT: add r3, r3, r4
2527-
; P9LE-NEXT: li r4, -12
2528-
; P9LE-NEXT: lxvx v2, r3, r4
2529-
; P9LE-NEXT: addis r3, r2, .LCPI41_0@toc@ha
2530-
; P9LE-NEXT: addi r3, r3, .LCPI41_0@toc@l
2531-
; P9LE-NEXT: lxv vs0, 0(r3)
2532-
; P9LE-NEXT: xxperm v2, v2, vs0
2524+
; P9LE-NEXT: addi r3, r3, -12
2525+
; P9LE-NEXT: lxvw4x v2, 0, r3
25332526
; P9LE-NEXT: blr
25342527
;
25352528
; P8BE-LABEL: fromDiffMemVarDui:
@@ -2546,15 +2539,16 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
25462539
;
25472540
; P8LE-LABEL: fromDiffMemVarDui:
25482541
; P8LE: # %bb.0: # %entry
2549-
; P8LE-NEXT: addis r5, r2, .LCPI41_0@toc@ha
25502542
; P8LE-NEXT: sldi r4, r4, 2
2551-
; P8LE-NEXT: addi r5, r5, .LCPI41_0@toc@l
25522543
; P8LE-NEXT: add r3, r3, r4
2553-
; P8LE-NEXT: lxvd2x vs0, 0, r5
25542544
; P8LE-NEXT: addi r3, r3, -12
2555-
; P8LE-NEXT: lxvd2x v3, 0, r3
2545+
; P8LE-NEXT: lxvd2x vs0, 0, r3
2546+
; P8LE-NEXT: addis r3, r2, .LCPI41_0@toc@ha
2547+
; P8LE-NEXT: addi r3, r3, .LCPI41_0@toc@l
25562548
; P8LE-NEXT: xxswapd v2, vs0
2557-
; P8LE-NEXT: vperm v2, v3, v3, v2
2549+
; P8LE-NEXT: lxvd2x vs0, 0, r3
2550+
; P8LE-NEXT: xxswapd v3, vs0
2551+
; P8LE-NEXT: vperm v2, v2, v2, v3
25582552
; P8LE-NEXT: blr
25592553
entry:
25602554
%idxprom = sext i32 %elem to i64

llvm/test/CodeGen/PowerPC/mma-intrinsics.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -394,18 +394,18 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) {
394394
; CHECK-NEXT: xxsetaccz acc2
395395
; CHECK-NEXT: xxsetaccz acc1
396396
; CHECK-NEXT: addi r6, r6, 6
397+
; CHECK-NEXT: add r8, r5, r7
397398
; CHECK-NEXT: lxvx vs0, r5, r7
398-
; CHECK-NEXT: add r7, r5, r7
399-
; CHECK-NEXT: lxv vs1, 16(r7)
399+
; CHECK-NEXT: rldic r7, r4, 6, 26
400+
; CHECK-NEXT: addi r4, r4, 3
401+
; CHECK-NEXT: lxv vs1, 16(r8)
400402
; CHECK-NEXT: xvf32gerpp acc2, vs0, vs1
401-
; CHECK-NEXT: lxv vs0, 32(r7)
402-
; CHECK-NEXT: lxv vs1, 48(r7)
403+
; CHECK-NEXT: lxv vs0, 32(r8)
404+
; CHECK-NEXT: lxv vs1, 48(r8)
403405
; CHECK-NEXT: xvf32gerpn acc1, vs0, vs1
404-
; CHECK-NEXT: lxv vs12, 64(r7)
405-
; CHECK-NEXT: lxv vs13, 80(r7)
406+
; CHECK-NEXT: lxv vs12, 64(r8)
407+
; CHECK-NEXT: lxv vs13, 80(r8)
406408
; CHECK-NEXT: xxsetaccz acc0
407-
; CHECK-NEXT: rldic r7, r4, 6, 26
408-
; CHECK-NEXT: addi r4, r4, 3
409409
; CHECK-NEXT: add r8, r3, r7
410410
; CHECK-NEXT: xxmfacc acc2
411411
; CHECK-NEXT: xvf32gernp acc0, vs12, vs13
@@ -443,18 +443,18 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) {
443443
; CHECK-BE-NEXT: xxsetaccz acc2
444444
; CHECK-BE-NEXT: xxsetaccz acc1
445445
; CHECK-BE-NEXT: addi r6, r6, 6
446+
; CHECK-BE-NEXT: add r8, r5, r7
446447
; CHECK-BE-NEXT: lxvx vs0, r5, r7
447-
; CHECK-BE-NEXT: add r7, r5, r7
448-
; CHECK-BE-NEXT: lxv vs1, 16(r7)
448+
; CHECK-BE-NEXT: rldic r7, r4, 6, 26
449+
; CHECK-BE-NEXT: addi r4, r4, 3
450+
; CHECK-BE-NEXT: lxv vs1, 16(r8)
449451
; CHECK-BE-NEXT: xvf32gerpp acc2, vs0, vs1
450-
; CHECK-BE-NEXT: lxv vs0, 32(r7)
451-
; CHECK-BE-NEXT: lxv vs1, 48(r7)
452+
; CHECK-BE-NEXT: lxv vs0, 32(r8)
453+
; CHECK-BE-NEXT: lxv vs1, 48(r8)
452454
; CHECK-BE-NEXT: xvf32gerpn acc1, vs0, vs1
453-
; CHECK-BE-NEXT: lxv vs12, 64(r7)
454-
; CHECK-BE-NEXT: lxv vs13, 80(r7)
455+
; CHECK-BE-NEXT: lxv vs12, 64(r8)
456+
; CHECK-BE-NEXT: lxv vs13, 80(r8)
455457
; CHECK-BE-NEXT: xxsetaccz acc0
456-
; CHECK-BE-NEXT: rldic r7, r4, 6, 26
457-
; CHECK-BE-NEXT: addi r4, r4, 3
458458
; CHECK-BE-NEXT: add r8, r3, r7
459459
; CHECK-BE-NEXT: xxmfacc acc2
460460
; CHECK-BE-NEXT: xvf32gernp acc0, vs12, vs13

0 commit comments

Comments
 (0)