Skip to content

Commit 9f992c2

Browse files
committed
AMDGPU/GlobalISel: Fix scc->vcc copy handling
This was checking the size of the register with the value of the size, which happens to be exec. Also fix assuming VCC is 64-bit to fix wave32. Also remove some untested handling for physical registers which is skipped. This doesn't insert the V_CNDMASK_B32 if SCC is the physical copy source. I'm not sure if this should be trying to handle this special case instead of dealing with this in copyPhysReg. llvm-svn: 364761
1 parent 5dafcb9 commit 9f992c2

File tree

3 files changed

+111
-39
lines changed

3 files changed

+111
-39
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,7 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
6060
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
6161

6262
static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
63-
if (Reg == AMDGPU::SCC)
64-
return true;
65-
66-
if (TargetRegisterInfo::isPhysicalRegister(Reg))
67-
return false;
63+
assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
6864

6965
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
7066
const TargetRegisterClass *RC =
@@ -77,6 +73,22 @@ static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
7773
return RB->getID() == AMDGPU::SCCRegBankID;
7874
}
7975

76+
static bool isVCC(unsigned Reg, const MachineRegisterInfo &MRI,
77+
const SIRegisterInfo &TRI) {
78+
assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
79+
80+
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
81+
const TargetRegisterClass *RC =
82+
RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
83+
if (RC) {
84+
return RC == TRI.getWaveMaskRegClass() &&
85+
MRI.getType(Reg).getSizeInBits() == 1;
86+
}
87+
88+
const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
89+
return RB->getID() == AMDGPU::VCCRegBankID;
90+
}
91+
8092
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
8193
MachineBasicBlock *BB = I.getParent();
8294
MachineFunction *MF = BB->getParent();
@@ -88,14 +100,12 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
88100
const MachineOperand &Src = I.getOperand(1);
89101
unsigned SrcReg = Src.getReg();
90102
if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
91-
unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI);
92-
unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI);
103+
unsigned DstReg = I.getOperand(0).getReg();
93104

94-
// We have a copy from a 32-bit to 64-bit register. This happens
95-
// when we are selecting scc->vcc copies.
96-
if (DstSize == 64) {
105+
// Specially handle scc->vcc copies.
106+
if (isVCC(DstReg, MRI, TRI)) {
97107
const DebugLoc &DL = I.getDebugLoc();
98-
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg())
108+
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
99109
.addImm(0)
100110
.addReg(SrcReg);
101111
if (!MRI.getRegClassOrNull(SrcReg))

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1690,8 +1690,8 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
16901690
case AMDGPU::VGPRRegBankID:
16911691
return &AMDGPU::VGPR_32RegClass;
16921692
case AMDGPU::VCCRegBankID:
1693-
// TODO: Check wavesize
1694-
return &AMDGPU::SReg_64_XEXECRegClass;
1693+
return isWave32 ?
1694+
&AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
16951695
case AMDGPU::SGPRRegBankID:
16961696
return &AMDGPU::SReg_32_XM0RegClass;
16971697
case AMDGPU::SCCRegBankID:
Lines changed: 88 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
2+
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel -o - %s | FileCheck -check-prefix=WAVE64 %s
3+
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -global-isel -o - %s | FileCheck -check-prefix=WAVE32 %s
34

45
---
56

@@ -11,33 +12,50 @@ regBankSelected: true
1112
body: |
1213
bb.0:
1314
liveins: $sgpr2_sgpr3
14-
; GCN-LABEL: name: copy
15-
; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
16-
; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
17-
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
18-
; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
15+
16+
; WAVE64-LABEL: name: copy
17+
; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
18+
; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
19+
; WAVE64: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
20+
; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
21+
; WAVE32-LABEL: name: copy
22+
; WAVE32: $vcc_hi = IMPLICIT_DEF
23+
; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
24+
; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
25+
; WAVE32: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
26+
; WAVE32: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
1927
%0:sgpr(p1) = COPY $sgpr2_sgpr3
2028
%1:vgpr(p1) = COPY %0
2129
%2:vgpr(s32) = G_IMPLICIT_DEF
2230
G_STORE %2, %1 :: (store 4, addrspace 1)
2331
...
2432
---
2533

26-
name: copy_vcc_scc
34+
name: copy_vcc_bank_scc_bank
2735
legalized: true
2836
regBankSelected: true
2937

3038
body: |
3139
bb.0:
3240
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
33-
; GCN-LABEL: name: copy_vcc_scc
34-
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
35-
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
36-
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
37-
; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
38-
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
39-
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
40-
; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
41+
42+
; WAVE64-LABEL: name: copy_vcc_bank_scc_bank
43+
; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
44+
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
45+
; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
46+
; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
47+
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
48+
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
49+
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
50+
; WAVE32-LABEL: name: copy_vcc_bank_scc_bank
51+
; WAVE32: $vcc_hi = IMPLICIT_DEF
52+
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
53+
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
54+
; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
55+
; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
56+
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
57+
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
58+
; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
4159
%0:vgpr(p1) = COPY $vgpr0_vgpr1
4260
%1:vgpr(s32) = COPY $vgpr2
4361
%2:vgpr(s32) = COPY $vgpr3
@@ -48,23 +66,35 @@ body: |
4866
...
4967
---
5068

51-
name: copy_vcc_scc_2_uses
69+
name: copy_vcc_bank_scc_bank_2_uses
5270
legalized: true
5371
regBankSelected: true
5472

5573
body: |
5674
bb.0:
5775
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
58-
; GCN-LABEL: name: copy_vcc_scc_2_uses
59-
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
60-
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
61-
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
62-
; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
63-
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
64-
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
65-
; GCN: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
66-
; GCN: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
67-
; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
76+
77+
; WAVE64-LABEL: name: copy_vcc_bank_scc_bank_2_uses
78+
; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
79+
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
80+
; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
81+
; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
82+
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
83+
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
84+
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
85+
; WAVE64: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
86+
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
87+
; WAVE32-LABEL: name: copy_vcc_bank_scc_bank_2_uses
88+
; WAVE32: $vcc_hi = IMPLICIT_DEF
89+
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
90+
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
91+
; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
92+
; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
93+
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
94+
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
95+
; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
96+
; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
97+
; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
6898
%0:vgpr(p1) = COPY $vgpr0_vgpr1
6999
%1:vgpr(s32) = COPY $vgpr2
70100
%2:vgpr(s32) = COPY $vgpr3
@@ -75,4 +105,36 @@ body: |
75105
%7:vgpr(s32) = G_SELECT %6, %1, %5
76106
G_STORE %7, %0 :: (store 4, addrspace 1)
77107
...
108+
78109
---
110+
111+
name: copy_vcc_bank_scc_physreg
112+
legalized: true
113+
regBankSelected: true
114+
115+
body: |
116+
bb.0:
117+
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
118+
119+
; WAVE64-LABEL: name: copy_vcc_bank_scc_physreg
120+
; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
121+
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
122+
; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
123+
; WAVE64: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc
124+
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
125+
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
126+
; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg
127+
; WAVE32: $vcc_hi = IMPLICIT_DEF
128+
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
129+
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
130+
; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
131+
; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
132+
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
133+
; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
134+
%0:vgpr(p1) = COPY $vgpr0_vgpr1
135+
%1:vgpr(s32) = COPY $vgpr2
136+
%2:vgpr(s32) = COPY $vgpr3
137+
%3:vcc(s1) = COPY $scc
138+
%5:vgpr(s32) = G_SELECT %3, %1, %2
139+
G_STORE %5, %0 :: (store 4, addrspace 1)
140+
...

0 commit comments

Comments
 (0)