Skip to content

Commit bc0af40

Browse files
authored
[AMDGPU] Prepare waterfall support for upstream changes (llvm#876)
Upstream change to restrict readfirstlane to non-m0 dst register causes issues for waterfall support. Add required changes before taking the upstream change. See llvm#128851
1 parent 2e6b808 commit bc0af40

File tree

3 files changed

+14
-9
lines changed

3 files changed

+14
-9
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaterfall.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,16 @@ static void readFirstLaneReg(MachineBasicBlock &MBB, MachineRegisterInfo *MRI,
123123
uint32_t RegSize = RI->getRegSizeInBits(*RFLRegRC) / 32;
124124
assert(RI->hasVGPRs(MRI->getRegClass(RFLSrcReg)) && "unexpected uniform operand for readfirstlane");
125125

126-
if (RegSize == 1)
126+
if (RegSize == 1) {
127+
MRI->constrainRegClass(RFLReg, &AMDGPU::SReg_32_XM0RegClass);
127128
BuildMI(MBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RFLReg)
128129
.addReg(RFLSrcReg, getUndefRegState(RFLSrcOp.isUndef()),
129130
RFLSrcOp.getSubReg());
131+
}
130132
else {
131133
SmallVector<Register, 8> TRegs;
132134
for (unsigned i = 0; i < RegSize; ++i) {
133-
Register TReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
135+
Register TReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
134136
BuildMI(MBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), TReg)
135137
.addReg(RFLSrcReg, 0, RI->getSubRegFromChannel(i));
136138
TRegs.push_back(TReg);
@@ -577,6 +579,9 @@ bool SIInsertWaterfall::processWaterfall(MachineBasicBlock &MBB) {
577579
CurrIdx.Index = TII->getNamedOperand(*(BeginMI), AMDGPU::OpName::idx);
578580
CurrIdx.IndexRC = RI->getRegClassForOperandReg(*MRI, *CurrIdx.Index);
579581
CurrIdx.IndexSRC = RI->getEquivalentSGPRClass(CurrIdx.IndexRC);
582+
if (CurrIdx.IndexSRC == &AMDGPU::SGPR_32RegClass)
583+
CurrIdx.IndexSRC = &AMDGPU::SReg_32_XM0RegClass;
584+
580585
IndexList.push_back(CurrIdx);
581586

582587
LLVM_DEBUG(if (RI->hasVGPRs(CurrIdx.IndexRC))

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1142,7 +1142,7 @@ def SI_WATERFALL_BEGIN_V2 : SI_WATERFALL_BEGIN<VReg_64>;
11421142
def SI_WATERFALL_BEGIN_V4 : SI_WATERFALL_BEGIN<VReg_128>;
11431143
def SI_WATERFALL_BEGIN_V8 : SI_WATERFALL_BEGIN<VReg_256>;
11441144

1145-
def SI_WATERFALL_READFIRSTLANE_V1 : SI_WATERFALL_READFIRSTLANE<SReg_32, VGPR_32>;
1145+
def SI_WATERFALL_READFIRSTLANE_V1 : SI_WATERFALL_READFIRSTLANE<SReg_32_XM0, VGPR_32>;
11461146
def SI_WATERFALL_READFIRSTLANE_V2 : SI_WATERFALL_READFIRSTLANE<SReg_64, VReg_64>;
11471147
def SI_WATERFALL_READFIRSTLANE_V4 : SI_WATERFALL_READFIRSTLANE<SGPR_128, VReg_128>;
11481148
def SI_WATERFALL_READFIRSTLANE_V8 : SI_WATERFALL_READFIRSTLANE<SReg_256, VReg_256>;

llvm/test/CodeGen/AMDGPU/si-insert-waterfall.mir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ body: |
1616
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1717
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1818
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1, [[COPY]], %subreg.sub2, [[COPY]], %subreg.sub3
19-
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
19+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
2020
; CHECK-NEXT: {{ $}}
2121
; CHECK-NEXT: .1:
2222
; CHECK-NEXT: successors: %bb.2(0x80000000)
@@ -28,7 +28,7 @@ body: |
2828
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
2929
; CHECK-NEXT: {{ $}}
3030
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF]], %bb.1, %14, %bb.2
31-
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
31+
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
3232
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY2]], implicit $exec
3333
; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
3434
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_]], %subreg.sub1
@@ -44,10 +44,10 @@ body: |
4444
%1:vgpr_32 = COPY $vgpr0
4545
%2:vgpr_32 = COPY $vgpr1
4646
%3:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %0:sgpr_32, %subreg.sub1, %0:sgpr_32, %subreg.sub2, %0:sgpr_32, %subreg.sub3
47-
%4:sreg_32 = S_MOV_B32 0
48-
%5:sreg_32 = SI_WATERFALL_BEGIN_V1 killed %4:sreg_32, %2:vgpr_32
49-
%6:sreg_32 = SI_WATERFALL_READFIRSTLANE_V1 %5:sreg_32, %2:vgpr_32
50-
%7:sreg_64 = REG_SEQUENCE %6:sreg_32, %subreg.sub0, %6:sreg_32, %subreg.sub1
47+
%4:sreg_32_xm0 = S_MOV_B32 0
48+
%5:sreg_32 = SI_WATERFALL_BEGIN_V1 killed %4:sreg_32_xm0, %2:vgpr_32
49+
%6:sreg_32_xm0 = SI_WATERFALL_READFIRSTLANE_V1 %5:sreg_32, %2:vgpr_32
50+
%7:sreg_64 = REG_SEQUENCE %6:sreg_32_xm0, %subreg.sub0, %6:sreg_32_xm0, %subreg.sub1
5151
%8:sgpr_256 = S_LOAD_DWORDX8_IMM killed %7:sreg_64, 0, 0
5252
%9:vreg_128 = IMAGE_SAMPLE_V4_V1_gfx10 %1:vgpr_32, killed %8:sgpr_256, killed %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
5353
%10:vreg_128 = SI_WATERFALL_END_V4 %5:sreg_32, killed %9:vreg_128

0 commit comments

Comments
 (0)