Skip to content

Commit 5c1026d

Browse files
committed
use 32-bit ops
1 parent 1373ce2 commit 5c1026d

File tree

4 files changed

+180
-172
lines changed

4 files changed

+180
-172
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5909,7 +5909,10 @@ bool AMDGPULegalizerInfo::legalizePointerAsRsrcIntrin(
59095909

59105910
B.setInsertPt(B.getMBB(), ++B.getInsertPt());
59115911

5912+
auto ExtStride = B.buildAnyExt(S32, Stride);
5913+
59125914
if (ST.has45BitNumRecordsBufferResource()) {
5915+
Register Zero = B.buildConstant(S32, 0).getReg(0);
59135916
// Build the lower 64-bit value, which has a 57-bit base and the lower 7-bit
59145917
// num_records.
59155918
LLT PtrIntTy = LLT::scalar(MRI.getType(Pointer).getSizeInBits());
@@ -5921,13 +5924,15 @@ bool AMDGPULegalizerInfo::legalizePointerAsRsrcIntrin(
59215924
// Build the higher 64-bit value, which has the higher 38-bit num_records,
59225925
// 6-bit zero (omit), 16-bit stride and scale and 4-bit flag.
59235926
auto NumRecordsRHS = B.buildLShr(S64, NumRecords, B.buildConstant(S32, 7));
5924-
auto ExtStride = B.buildAnyExt(S64, Stride);
5925-
auto ShiftedStride = B.buildShl(S64, ExtStride, B.buildConstant(S32, 44));
5926-
auto ExtFlags = B.buildAnyExt(S64, Flags);
5927-
auto NewFlags = B.buildAnd(S64, ExtFlags, B.buildConstant(S64, 0x3));
5928-
auto ShiftedFlags = B.buildShl(S64, NewFlags, B.buildConstant(S32, 60));
5929-
auto CombinedFields = B.buildOr(S64, NumRecordsRHS, ShiftedStride);
5930-
Register HighHalf = B.buildOr(S64, CombinedFields, ShiftedFlags).getReg(0);
5927+
auto ShiftedStride = B.buildShl(S32, ExtStride, B.buildConstant(S32, 12));
5928+
auto ExtShiftedStride =
5929+
B.buildMergeValues(S64, {Zero, ShiftedStride.getReg(0)});
5930+
auto ShiftedFlags = B.buildShl(S32, Flags, B.buildConstant(S32, 28));
5931+
auto ExtShiftedFlags =
5932+
B.buildMergeValues(S64, {Zero, ShiftedFlags.getReg(0)});
5933+
auto CombinedFields = B.buildOr(S64, NumRecordsRHS, ExtShiftedStride);
5934+
Register HighHalf =
5935+
B.buildOr(S64, CombinedFields, ExtShiftedFlags).getReg(0);
59315936
B.buildMergeValues(Result, {LowHalf, HighHalf});
59325937
} else {
59335938
NumRecords = B.buildTrunc(S32, NumRecords).getReg(0);
@@ -5937,7 +5942,6 @@ bool AMDGPULegalizerInfo::legalizePointerAsRsrcIntrin(
59375942

59385943
auto AndMask = B.buildConstant(S32, 0x0000ffff);
59395944
auto Masked = B.buildAnd(S32, HighHalf, AndMask);
5940-
auto ExtStride = B.buildAnyExt(S32, Stride);
59415945
auto ShiftConst = B.buildConstant(S32, 16);
59425946
auto ShiftedStride = B.buildShl(S32, ExtStride, ShiftConst);
59435947
auto NewHighHalf = B.buildOr(S32, Masked, ShiftedStride);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11602,9 +11602,11 @@ SDValue SITargetLowering::lowerPointerAsRsrcIntrin(SDNode *Op,
1160211602
SDValue NumRecords = Op->getOperand(3);
1160311603
SDValue Flags = Op->getOperand(4);
1160411604

11605+
SDValue ExtStride = DAG.getAnyExtOrTrunc(Stride, Loc, MVT::i32);
1160511606
SDValue Rsrc;
1160611607

1160711608
if (Subtarget->has45BitNumRecordsBufferResource()) {
11609+
SDValue Zero = DAG.getConstant(0, Loc, MVT::i32);
1160811610
// Build the lower 64-bit value, which has a 57-bit base and the lower 7-bit
1160911611
// num_records.
1161011612
SDValue ExtPointer = DAG.getAnyExtOrTrunc(Pointer, Loc, MVT::i64);
@@ -11619,20 +11621,24 @@ SDValue SITargetLowering::lowerPointerAsRsrcIntrin(SDNode *Op,
1161911621
SDValue NumRecordsRHS =
1162011622
DAG.getNode(ISD::SRL, Loc, MVT::i64, NumRecords,
1162111623
DAG.getShiftAmountConstant(7, MVT::i32, Loc));
11622-
SDValue ExtStride = DAG.getAnyExtOrTrunc(Stride, Loc, MVT::i64);
1162311624
SDValue ShiftedStride =
11624-
DAG.getNode(ISD::SHL, Loc, MVT::i64, ExtStride,
11625-
DAG.getShiftAmountConstant(44, MVT::i32, Loc));
11626-
SDValue ExtFlags = DAG.getAnyExtOrTrunc(Flags, Loc, MVT::i64);
11627-
SDValue NewFlags = DAG.getNode(ISD::AND, Loc, MVT::i64, ExtFlags,
11628-
DAG.getConstant(0x3, Loc, MVT::i64));
11625+
DAG.getNode(ISD::SHL, Loc, MVT::i32, ExtStride,
11626+
DAG.getShiftAmountConstant(12, MVT::i32, Loc));
11627+
SDValue ExtShiftedStrideVec =
11628+
DAG.getNode(ISD::BUILD_VECTOR, Loc, MVT::v2i32, Zero, ShiftedStride);
11629+
SDValue ExtShiftedStride =
11630+
DAG.getNode(ISD::BITCAST, Loc, MVT::i64, ExtShiftedStrideVec);
1162911631
SDValue ShiftedFlags =
11630-
DAG.getNode(ISD::SHL, Loc, MVT::i64, NewFlags,
11631-
DAG.getShiftAmountConstant(60, MVT::i32, Loc));
11632+
DAG.getNode(ISD::SHL, Loc, MVT::i32, Flags,
11633+
DAG.getShiftAmountConstant(28, MVT::i32, Loc));
11634+
SDValue ExtShiftedFlagsVec =
11635+
DAG.getNode(ISD::BUILD_VECTOR, Loc, MVT::v2i32, Zero, ShiftedFlags);
11636+
SDValue ExtShiftedFlags =
11637+
DAG.getNode(ISD::BITCAST, Loc, MVT::i64, ExtShiftedFlagsVec);
1163211638
SDValue CombinedFields =
11633-
DAG.getNode(ISD::OR, Loc, MVT::i64, NumRecordsRHS, ShiftedStride);
11639+
DAG.getNode(ISD::OR, Loc, MVT::i64, NumRecordsRHS, ExtShiftedStride);
1163411640
SDValue HighHalf =
11635-
DAG.getNode(ISD::OR, Loc, MVT::i64, CombinedFields, ShiftedFlags);
11641+
DAG.getNode(ISD::OR, Loc, MVT::i64, CombinedFields, ExtShiftedFlags);
1163611642

1163711643
Rsrc = DAG.getNode(ISD::BUILD_VECTOR, Loc, MVT::v2i64, LowHalf, HighHalf);
1163811644
} else {
@@ -11641,7 +11647,6 @@ SDValue SITargetLowering::lowerPointerAsRsrcIntrin(SDNode *Op,
1164111647
DAG.SplitScalar(Pointer, Loc, MVT::i32, MVT::i32);
1164211648
SDValue Mask = DAG.getConstant(0x0000ffff, Loc, MVT::i32);
1164311649
SDValue Masked = DAG.getNode(ISD::AND, Loc, MVT::i32, HighHalf, Mask);
11644-
SDValue ExtStride = DAG.getAnyExtOrTrunc(Stride, Loc, MVT::i32);
1164511650
SDValue ShiftedStride =
1164611651
DAG.getNode(ISD::SHL, Loc, MVT::i32, ExtStride,
1164711652
DAG.getShiftAmountConstant(16, MVT::i32, Loc));

0 commit comments

Comments
 (0)