Skip to content

Commit 0426c2d

Browse files
committed
Reapply "AMDGPU: Cleanup and fix SMRD offset handling"
This reverts commit 6a4acb9.
1 parent ba1f3db commit 0426c2d

File tree

9 files changed

+306
-39
lines changed

9 files changed

+306
-39
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,26 +1771,31 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
17711771

17721772
SDLoc SL(ByteOffsetNode);
17731773
GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1774-
int64_t ByteOffset = C->getSExtValue();
1775-
int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1776-
1777-
if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1778-
Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1774+
uint64_t ByteOffset = C->getZExtValue();
1775+
Optional<int64_t> EncodedOffset =
1776+
AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1777+
if (EncodedOffset) {
1778+
Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
17791779
Imm = true;
17801780
return true;
17811781
}
17821782

1783-
if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1783+
if (Gen == AMDGPUSubtarget::SEA_ISLANDS) {
1784+
EncodedOffset =
1785+
AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1786+
if (EncodedOffset) {
1787+
Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1788+
return true;
1789+
}
1790+
}
1791+
1792+
if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
17841793
return false;
17851794

1786-
if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1787-
// 32-bit Immediates are supported on Sea Islands.
1788-
Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1789-
} else {
1790-
SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1791-
Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1792-
C32Bit), 0);
1793-
}
1795+
SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1796+
Offset = SDValue(
1797+
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1798+
17941799
Imm = false;
17951800
return true;
17961801
}

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2107,15 +2107,14 @@ AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
21072107
return None;
21082108

21092109
const GEPInfo &GEPInfo = AddrInfo[0];
2110-
2111-
if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
2110+
Optional<int64_t> EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
2111+
if (!EncodedImm)
21122112
return None;
21132113

21142114
unsigned PtrReg = GEPInfo.SgprParts[0];
2115-
int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
21162115
return {{
21172116
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
2118-
[=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
2117+
[=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }
21192118
}};
21202119
}
21212120

@@ -2129,13 +2128,14 @@ AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
21292128

21302129
const GEPInfo &GEPInfo = AddrInfo[0];
21312130
unsigned PtrReg = GEPInfo.SgprParts[0];
2132-
int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
2133-
if (!isUInt<32>(EncodedImm))
2131+
Optional<int64_t> EncodedImm =
2132+
AMDGPU::getSMRDEncodedLiteralOffset32(STI, GEPInfo.Imm);
2133+
if (!EncodedImm)
21342134
return None;
21352135

21362136
return {{
21372137
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
2138-
[=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
2138+
[=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }
21392139
}};
21402140
}
21412141

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
587587
16, 4);
588588
unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
589589
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
590-
unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset);
590+
unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
591591
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
592592
.addReg(Rsrc01)
593593
.addImm(EncodedOffset) // offset

llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
501501
: 4;
502502
break;
503503
case S_BUFFER_LOAD_IMM:
504-
EltSize = AMDGPU::getSMRDEncodedOffset(STM, 4);
504+
EltSize = AMDGPU::convertSMRDOffsetUnits(STM, 4);
505505
break;
506506
default:
507507
EltSize = 4;

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,16 +1247,43 @@ static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
12471247
return isGCN3Encoding(ST) || isGFX10(ST);
12481248
}
12491249

1250-
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
1250+
static bool isLegalSMRDEncodedImmOffset(const MCSubtargetInfo &ST,
1251+
int64_t EncodedOffset) {
1252+
return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
1253+
: isUInt<8>(EncodedOffset);
1254+
}
1255+
1256+
static bool isDwordAligned(uint64_t ByteOffset) {
1257+
return (ByteOffset & 3) == 0;
1258+
}
1259+
1260+
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
1261+
uint64_t ByteOffset) {
12511262
if (hasSMEMByteOffset(ST))
12521263
return ByteOffset;
1264+
1265+
assert(isDwordAligned(ByteOffset));
12531266
return ByteOffset >> 2;
12541267
}
12551268

1256-
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
1257-
int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
1258-
return (hasSMEMByteOffset(ST)) ?
1259-
isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
1269+
Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1270+
int64_t ByteOffset) {
1271+
if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
1272+
return None;
1273+
1274+
int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
1275+
return isLegalSMRDEncodedImmOffset(ST, EncodedOffset) ?
1276+
Optional<int64_t>(EncodedOffset) : None;
1277+
}
1278+
1279+
Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1280+
int64_t ByteOffset) {
1281+
if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
1282+
return None;
1283+
1284+
assert(isCI(ST));
1285+
int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
1286+
return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None;
12601287
}
12611288

12621289
// Given Imm, split it into the values to put into the SOffset and ImmOffset

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -648,9 +648,19 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
648648

649649
bool isArgPassedInSGPR(const Argument *Arg);
650650

651-
/// \returns The encoding that will be used for \p ByteOffset in the SMRD
652-
/// offset field.
653-
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
651+
/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
652+
/// offsets.
653+
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
654+
655+
/// \returns The encoding that will be used for \p ByteOffset in the SMRD offset
656+
/// field, or None if it won't fit. This is useful on all subtargets.
657+
Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
658+
int64_t ByteOffset);
659+
660+
/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
661+
/// instruction. This is only useful on CI.s
662+
Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
663+
int64_t ByteOffset);
654664

655665
/// \returns true if this offset is small enough to fit in the SMRD
656666
/// offset field. \p ByteOffset should be the offset in bytes and

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -788,8 +788,9 @@ body: |
788788
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575
789789
; GFX7: liveins: $sgpr0_sgpr1
790790
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
791-
; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262143, 0, 0 :: (load 4, addrspace 4)
792-
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]]
791+
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
792+
; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
793+
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
793794
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575
794795
; GFX8: liveins: $sgpr0_sgpr1
795796
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
@@ -872,8 +873,9 @@ body: |
872873
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823
873874
; GFX7: liveins: $sgpr0_sgpr1
874875
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
875-
; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 268435455, 0, 0 :: (load 4, addrspace 4)
876-
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]]
876+
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
877+
; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
878+
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
877879
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823
878880
; GFX8: liveins: $sgpr0_sgpr1
879881
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1

llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -368,9 +368,16 @@ done:
368368

369369
; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
370370
; GCN: s_and_saveexec_b64
371-
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
372-
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
371+
; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
372+
; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
373373
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
374+
375+
; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
376+
; VI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
377+
; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
378+
379+
; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffffff{{$}}
380+
374381
; GCN: s_or_b64 exec, exec
375382
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
376383
entry:

0 commit comments

Comments
 (0)