Skip to content

Commit 47acd93

Browse files
committed
On newer targets, revert back to using s_cmp
1 parent 74a2bea commit 47acd93

File tree

4 files changed

+32
-17
lines changed

4 files changed

+32
-17
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -222,12 +222,20 @@ bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const {
222222
const DebugLoc &DL = I.getDebugLoc();
223223
MachineBasicBlock *BB = I.getParent();
224224
Register VCCReg = I.getOperand(1).getReg();
225+
MachineInstr *Cmp;
225226

226-
unsigned Opc = STI.isWave64() ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
227-
Register DeadDst = MRI->createVirtualRegister(
228-
STI.isWave64() ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass);
229-
MachineInstr *Cmp =
230-
BuildMI(*BB, &I, DL, TII.get(Opc), DeadDst).addReg(VCCReg).addReg(VCCReg);
227+
if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
228+
unsigned CmpOpc =
229+
STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
230+
Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)).addReg(VCCReg).addImm(0);
231+
} else {
232+
// For gfx7 and earlier, S_CMP_LG_U64 doesn't exist, so we use S_OR_B64
233+
// which sets SCC as a side effect.
234+
Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
235+
Cmp = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
236+
.addReg(VCCReg)
237+
.addReg(VCCReg);
238+
}
231239

232240
if (!constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI))
233241
return false;

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr add
2929
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34
3030
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
3131
; GFX8-NEXT: v_cmp_eq_f32_e64 s[4:5], s0, 0
32-
; GFX8-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
32+
; GFX8-NEXT: s_cmp_lg_u64 s[4:5], 0
3333
; GFX8-NEXT: s_cselect_b32 s0, 1, 0
3434
; GFX8-NEXT: s_and_b32 s0, s0, 1
3535
; GFX8-NEXT: s_cmp_lg_u32 s0, 0
@@ -49,7 +49,7 @@ define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr add
4949
; GFX11-NEXT: v_mov_b32_e32 v1, 0
5050
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
5151
; GFX11-NEXT: v_cmp_eq_f32_e64 s0, s0, 0
52-
; GFX11-NEXT: s_or_b32 s0, s0, s0
52+
; GFX11-NEXT: s_cmp_lg_u32 s0, 0
5353
; GFX11-NEXT: s_cselect_b32 s0, 1, 0
5454
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
5555
; GFX11-NEXT: s_and_b32 s0, s0, 1

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX7 %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GF8 %s
44
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s
55

66
---
@@ -10,16 +10,23 @@ regBankSelected: true
1010
tracksRegLiveness: true
1111
body: |
1212
bb.0:
13-
; GFX-LABEL: name: test_copy_scc_vcc
14-
; GFX: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
15-
; GFX-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc
16-
; GFX-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
17-
; GFX-NEXT: $sgpr0 = COPY [[COPY]]
18-
; GFX-NEXT: S_ENDPGM 0, implicit $sgpr0
13+
; GFX7-LABEL: name: test_copy_scc_vcc
14+
; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
15+
; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc
16+
; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
17+
; GFX7-NEXT: $sgpr0 = COPY [[COPY]]
18+
; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0
19+
;
20+
; GF8-LABEL: name: test_copy_scc_vcc
21+
; GF8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
22+
; GF8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc
23+
; GF8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
24+
; GF8-NEXT: $sgpr0 = COPY [[COPY]]
25+
; GF8-NEXT: S_ENDPGM 0, implicit $sgpr0
1926
;
2027
; GFX11-LABEL: name: test_copy_scc_vcc
2128
; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
22-
; GFX11-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[DEF]], [[DEF]], implicit-def $scc
29+
; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc
2330
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
2431
; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
2532
; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ define amdgpu_ps void @vcc_to_scc(float inreg %a, i32 inreg %b, i32 inreg %c, pt
210210
; NEW_RBS-LABEL: vcc_to_scc:
211211
; NEW_RBS: ; %bb.0:
212212
; NEW_RBS-NEXT: v_cmp_eq_f32_e64 s0, s0, 0
213-
; NEW_RBS-NEXT: s_or_b32 s0, s0, s0
213+
; NEW_RBS-NEXT: s_cmp_lg_u32 s0, 0
214214
; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0
215215
; NEW_RBS-NEXT: s_and_b32 s0, s0, 1
216216
; NEW_RBS-NEXT: s_cmp_lg_u32 s0, 0

0 commit comments

Comments
 (0)