Skip to content

Commit 35c710f

Browse files
Flakebimemfrob
authored andcommitted
[AMDGPU] Mark waterfall loops as SI_WATERFALL_LOOP
This way, they can be detected later, e.g. by the SIOptimizeVGPRLiveRange pass. Differential Revision: https://reviews.llvm.org/D105467
1 parent f41d626 commit 35c710f

File tree

4 files changed

+22
-9
lines changed

4 files changed

+22
-9
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5278,7 +5278,7 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
52785278
.addReg(Exec)
52795279
.addReg(SaveExec);
52805280

5281-
BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
5281+
BuildMI(LoopBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB);
52825282
}
52835283

52845284
// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,14 @@ def SI_ELSE : CFPseudoInstSI <
318318
let hasSideEffects = 1;
319319
}
320320

321+
def SI_WATERFALL_LOOP : CFPseudoInstSI <
322+
(outs),
323+
(ins brtarget:$target), [], 1> {
324+
let Size = 8;
325+
let isBranch = 1;
326+
let Defs = [];
327+
}
328+
321329
def SI_LOOP : CFPseudoInstSI <
322330
(outs), (ins SReg_1:$saved, brtarget:$target),
323331
[(AMDGPUloop i1:$saved, bb:$target)], 1, 1> {

llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,10 @@ MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
600600
emitLoop(MI);
601601
break;
602602

603+
case AMDGPU::SI_WATERFALL_LOOP:
604+
MI.setDesc(TII->get(AMDGPU::S_CBRANCH_EXECNZ));
605+
break;
606+
603607
case AMDGPU::SI_END_CF:
604608
SplitBB = emitEndCf(MI);
605609
break;
@@ -840,6 +844,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
840844
case AMDGPU::SI_IF:
841845
case AMDGPU::SI_ELSE:
842846
case AMDGPU::SI_IF_BREAK:
847+
case AMDGPU::SI_WATERFALL_LOOP:
843848
case AMDGPU::SI_LOOP:
844849
case AMDGPU::SI_END_CF:
845850
SplitMBB = process(MI);

llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
3131
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
3232
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
33-
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
33+
# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
3434
# W64-LABEL: bb.2:
3535
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]
3636

@@ -55,7 +55,7 @@
5555
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
5656
# TODO: S_XOR_B32_term should be `implicit-def $scc`
5757
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
58-
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
58+
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
5959
# W32-LABEL: bb.2:
6060
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
6161
---
@@ -103,7 +103,7 @@ body: |
103103
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
104104
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
105105
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
106-
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
106+
# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
107107
# W64-LABEL: bb.2:
108108
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]
109109

@@ -128,7 +128,7 @@ body: |
128128
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
129129
# TODO: S_XOR_B32_term should be `implicit-def $scc`
130130
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
131-
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
131+
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
132132
# W32-LABEL: bb.2:
133133
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
134134
---
@@ -176,7 +176,7 @@ body: |
176176
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
177177
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
178178
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
179-
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
179+
# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
180180
# W64-LABEL: bb.2:
181181
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]
182182

@@ -201,7 +201,7 @@ body: |
201201
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
202202
# TODO: S_XOR_B32_term should be `implicit-def $scc`
203203
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
204-
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
204+
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
205205
# W32-LABEL: bb.2:
206206
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
207207
---
@@ -286,7 +286,7 @@ body: |
286286
# W64-NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
287287
# W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
288288
# W64-NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
289-
# W64-NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
289+
# W64-NO-ADDR64: SI_WATERFALL_LOOP %bb.1, implicit $exec
290290
# W64-NO-ADDR64-LABEL: bb.2:
291291
# W64-NO-ADDR64: $exec = S_MOV_B64 [[SAVEEXEC]]
292292

@@ -309,7 +309,7 @@ body: |
309309
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
310310
# TODO: S_XOR_B32_term should be `implicit-def $scc`
311311
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
312-
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
312+
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
313313
# W32-LABEL: bb.2:
314314
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
315315

0 commit comments

Comments
 (0)