Skip to content

Commit 0e86009

Browse files
committed
[AMDGPU][GlobalISel] Fix issue with copy_scc_vcc on gfx7
When selecting for G_AMDGPU_COPY_SCC_VCC, we use S_CMP_LG_U64 or S_CMP_LG_U32 for wave64 and wave32 respectively. However, on gfx7 we do not have the S_CMP_LG_U64 instruction. Work around this issue by using S_OR_B64 instead.
1 parent 1c837ec commit 0e86009

File tree

2 files changed

+54
-5
lines changed

2 files changed

+54
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -221,12 +221,23 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
221221
bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const {
222222
const DebugLoc &DL = I.getDebugLoc();
223223
MachineBasicBlock *BB = I.getParent();
224+
Register VCCReg = I.getOperand(1).getReg();
225+
MachineInstr *Cmp;
226+
227+
if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
228+
unsigned CmpOpc = STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
229+
Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc))
230+
.addReg(VCCReg)
231+
.addImm(0);
232+
} else {
233+
// For gfx7 and earlier, S_CMP_LG_U64 doesn't exist, so we use S_OR_B64
234+
// which sets SCC as a side effect.
235+
Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
236+
Cmp = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
237+
.addReg(VCCReg)
238+
.addReg(VCCReg);
239+
}
224240

225-
unsigned CmpOpc =
226-
STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
227-
MachineInstr *Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc))
228-
.addReg(I.getOperand(1).getReg())
229-
.addImm(0);
230241
if (!constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI))
231242
return false;
232243

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX7 %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
4+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX11 %s
5+
6+
---
7+
name: test_copy_scc_vcc
8+
legalized: true
9+
regBankSelected: true
10+
tracksRegLiveness: true
11+
body: |
12+
bb.0:
13+
14+
; GFX7-LABEL: name: test_copy_scc_vcc
15+
; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
16+
; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc
17+
; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
18+
; GFX7-NEXT: $sgpr0 = COPY [[COPY]]
19+
; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0
20+
;
21+
; GFX8-LABEL: name: test_copy_scc_vcc
22+
; GFX8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
23+
; GFX8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc
24+
; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
25+
; GFX8-NEXT: $sgpr0 = COPY [[COPY]]
26+
; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr0
27+
;
28+
; GFX11-LABEL: name: test_copy_scc_vcc
29+
; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
30+
; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc
31+
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
32+
; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
33+
; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0
34+
%0:vcc(s1) = G_IMPLICIT_DEF
35+
%1:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %0
36+
$sgpr0 = COPY %1
37+
S_ENDPGM 0, implicit $sgpr0
38+
...

0 commit comments

Comments
 (0)