From 0e860099930669d924386c67aa25fcd7acc31887 Mon Sep 17 00:00:00 2001 From: Vang Thao Date: Tue, 28 Oct 2025 00:58:17 -0700 Subject: [PATCH 1/4] [AMDGPU][GlobalISel] Fix issue with copy_scc_vcc on gfx7 When selecting for G_AMDGPU_COPY_SCC_VCC, we use S_CMP_LG_U64 or S_CMP_LG_U32 for wave64 and wave32 respectively. However, on gfx7 we do not have the S_CMP_LG_U64 instruction. Work around this issue by using S_OR_B64 instead. --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 21 +++++++--- .../GlobalISel/inst-select-copy-scc-vcc.mir | 38 +++++++++++++++++++ 2 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 97c2c9c5316b3..1066601705d48 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -221,12 +221,23 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const { const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock *BB = I.getParent(); + Register VCCReg = I.getOperand(1).getReg(); + MachineInstr *Cmp; + + if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + unsigned CmpOpc = STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32; + Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)) + .addReg(VCCReg) + .addImm(0); + } else { + // For gfx7 and earlier, S_CMP_LG_U64 doesn't exist, so we use S_OR_B64 + // which sets SCC as a side effect. + Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + Cmp = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_OR_B64), DeadDst) + .addReg(VCCReg) + .addReg(VCCReg); + } - unsigned CmpOpc = - STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32; - MachineInstr *Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)) - .addReg(I.getOperand(1).getReg()) - .addImm(0); if (!constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI)) return false; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir new file mode 100644 index 0000000000000..8b5fc565bdc1c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir @@ -0,0 +1,38 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX7 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX11 %s + +--- +name: test_copy_scc_vcc +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + + ; GFX7-LABEL: name: test_copy_scc_vcc + ; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX7-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0 + ; + ; GFX8-LABEL: name: test_copy_scc_vcc + ; GFX8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GFX8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX8-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr0 + ; + ; GFX11-LABEL: name: test_copy_scc_vcc + ; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF + ; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0 + %0:vcc(s1) = G_IMPLICIT_DEF + %1:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %0 + $sgpr0 = COPY %1 + S_ENDPGM 0, implicit $sgpr0 +... From bbae7b35ddc5fadf73647eb68ede7cd61eb76d33 Mon Sep 17 00:00:00 2001 From: Vang Thao Date: Tue, 28 Oct 2025 01:09:23 -0700 Subject: [PATCH 2/4] Fix formatting --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 1066601705d48..dec6f427223ba 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -225,10 +225,9 @@ bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const { MachineInstr *Cmp; if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - unsigned CmpOpc = STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32; - Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)) - .addReg(VCCReg) - .addImm(0); + unsigned CmpOpc = + STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32; + Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)).addReg(VCCReg).addImm(0); } else { // For gfx7 and earlier, S_CMP_LG_U64 doesn't exist, so we use S_OR_B64 // which sets SCC as a side effect. From 74a2beacdd7ef00f61a3d1d26e522de487e397ed Mon Sep 17 00:00:00 2001 From: Vang Thao Date: Tue, 28 Oct 2025 10:51:11 -0700 Subject: [PATCH 3/4] Add ll test and change to OR instruction on newer targets also. --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 18 ++--- .../GlobalISel/inst-select-copy-scc-vcc.ll | 66 +++++++++++++++++++ .../GlobalISel/inst-select-copy-scc-vcc.mir | 28 +++----- .../AMDGPU/GlobalISel/regbankselect-mui.ll | 2 +- 4 files changed, 82 insertions(+), 32 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index dec6f427223ba..9f9cd13d09cc5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -222,20 +222,12 @@ bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const { const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock *BB = I.getParent(); Register VCCReg = I.getOperand(1).getReg(); - MachineInstr *Cmp; - if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - unsigned CmpOpc = - STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32; - Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)).addReg(VCCReg).addImm(0); - } else { - // For gfx7 and earlier, S_CMP_LG_U64 doesn't exist, so we use S_OR_B64 - // which sets SCC as a side effect. - Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); - Cmp = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_OR_B64), DeadDst) - .addReg(VCCReg) - .addReg(VCCReg); - } + unsigned Opc = STI.isWave64() ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32; + Register DeadDst = MRI->createVirtualRegister( + STI.isWave64() ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass); + MachineInstr *Cmp = + BuildMI(*BB, &I, DL, TII.get(Opc), DeadDst).addReg(VCCReg).addReg(VCCReg); if (!constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI)) return false; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll new file mode 100644 index 0000000000000..f914fbc7ff170 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s + +define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr addrspace(1) %out) { +; GFX7-LABEL: fcmp_uniform_select: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x9 +; GFX7-NEXT: s_load_dword s3, s[4:5], 0xb +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_f32_e64 s[4:5], s6, 0 +; GFX7-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX7-NEXT: s_cselect_b32 s4, 1, 0 +; GFX7-NEXT: s_and_b32 s4, s4, 1 +; GFX7-NEXT: s_cmp_lg_u32 s4, 0 +; GFX7-NEXT: s_cselect_b32 s3, s7, s3 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: fcmp_uniform_select: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c +; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_cmp_eq_f32_e64 s[4:5], s0, 0 +; GFX8-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX8-NEXT: s_cselect_b32 s0, 1, 0 +; GFX8-NEXT: s_and_b32 s0, s0, 1 +; GFX8-NEXT: s_cmp_lg_u32 s0, 0 +; GFX8-NEXT: s_cselect_b32 s0, s1, s6 +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: s_endpgm +; +; GFX11-LABEL: fcmp_uniform_select: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x2c +; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x34 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 +; GFX11-NEXT: s_or_b32 s0, s0, s0 +; GFX11-NEXT: s_cselect_b32 s0, 1, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s0, s0, 1 +; GFX11-NEXT: s_cmp_lg_u32 s0, 0 +; GFX11-NEXT: s_cselect_b32 s0, s1, s6 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b32 v1, v0, s[2:3] +; GFX11-NEXT: s_endpgm + %cmp = fcmp oeq float %a, 0.0 + %sel = select i1 %cmp, i32 %b, i32 %c + store i32 %sel, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir index 8b5fc565bdc1c..9a73b01f726cc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX7 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX11 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s --- name: test_copy_scc_vcc @@ -10,24 +10,16 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - - ; GFX7-LABEL: name: test_copy_scc_vcc - ; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX7-NEXT: $sgpr0 = COPY [[COPY]] - ; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0 - ; - ; GFX8-LABEL: name: test_copy_scc_vcc - ; GFX8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GFX8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: $sgpr0 = COPY [[COPY]] - ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr0 + ; GFX-LABEL: name: test_copy_scc_vcc + ; GFX: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GFX-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc + ; GFX-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX-NEXT: S_ENDPGM 0, implicit $sgpr0 ; ; GFX11-LABEL: name: test_copy_scc_vcc ; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF - ; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc + ; GFX11-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[DEF]], [[DEF]], implicit-def $scc ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] ; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll index 5240bf4f3a1d7..f82b9301e290c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll @@ -210,7 +210,7 @@ define amdgpu_ps void @vcc_to_scc(float inreg %a, i32 inreg %b, i32 inreg %c, pt ; NEW_RBS-LABEL: vcc_to_scc: ; NEW_RBS: ; %bb.0: ; NEW_RBS-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 -; NEW_RBS-NEXT: s_cmp_lg_u32 s0, 0 +; NEW_RBS-NEXT: s_or_b32 s0, s0, s0 ; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0 ; NEW_RBS-NEXT: s_and_b32 s0, s0, 1 ; NEW_RBS-NEXT: s_cmp_lg_u32 s0, 0 From 47acd93e3bd72f1e868adb72af170704c0f0f67b Mon Sep 17 00:00:00 2001 From: Vang Thao Date: Wed, 29 Oct 2025 13:15:38 -0700 Subject: [PATCH 4/4] On newer targets, revert back to using s_cmp --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 18 +++++++++---- .../GlobalISel/inst-select-copy-scc-vcc.ll | 4 +-- .../GlobalISel/inst-select-copy-scc-vcc.mir | 25 ++++++++++++------- .../AMDGPU/GlobalISel/regbankselect-mui.ll | 2 +- 4 files changed, 32 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 9f9cd13d09cc5..dec6f427223ba 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -222,12 +222,20 @@ bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const { const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock *BB = I.getParent(); Register VCCReg = I.getOperand(1).getReg(); + MachineInstr *Cmp; - unsigned Opc = STI.isWave64() ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32; - Register DeadDst = MRI->createVirtualRegister( - STI.isWave64() ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass); - MachineInstr *Cmp = - BuildMI(*BB, &I, DL, TII.get(Opc), DeadDst).addReg(VCCReg).addReg(VCCReg); + if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + unsigned CmpOpc = + STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32; + Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)).addReg(VCCReg).addImm(0); + } else { + // For gfx7 and earlier, S_CMP_LG_U64 doesn't exist, so we use S_OR_B64 + // which sets SCC as a side effect. + Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + Cmp = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_OR_B64), DeadDst) + .addReg(VCCReg) + .addReg(VCCReg); + } if (!constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI)) return false; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll index f914fbc7ff170..1a7ccf0835686 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll @@ -29,7 +29,7 @@ define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr add ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_f32_e64 s[4:5], s0, 0 -; GFX8-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX8-NEXT: s_cmp_lg_u64 s[4:5], 0 ; GFX8-NEXT: s_cselect_b32 s0, 1, 0 ; GFX8-NEXT: s_and_b32 s0, s0, 1 ; GFX8-NEXT: s_cmp_lg_u32 s0, 0 @@ -49,7 +49,7 @@ define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr add ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 -; GFX11-NEXT: s_or_b32 s0, s0, s0 +; GFX11-NEXT: s_cmp_lg_u32 s0, 0 ; GFX11-NEXT: s_cselect_b32 s0, 1, 0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, s0, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir index 9a73b01f726cc..67cc0169af619 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX7 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GF8 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s --- @@ -10,16 +10,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - ; GFX-LABEL: name: test_copy_scc_vcc - ; GFX: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GFX-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc - ; GFX-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX-NEXT: $sgpr0 = COPY [[COPY]] - ; GFX-NEXT: S_ENDPGM 0, implicit $sgpr0 + ; GFX7-LABEL: name: test_copy_scc_vcc + ; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX7-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0 + ; + ; GF8-LABEL: name: test_copy_scc_vcc + ; GF8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GF8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc + ; GF8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GF8-NEXT: $sgpr0 = COPY [[COPY]] + ; GF8-NEXT: S_ENDPGM 0, implicit $sgpr0 ; ; GFX11-LABEL: name: test_copy_scc_vcc ; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF - ; GFX11-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[DEF]], [[DEF]], implicit-def $scc + ; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] ; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll index f82b9301e290c..5240bf4f3a1d7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll @@ -210,7 +210,7 @@ define amdgpu_ps void @vcc_to_scc(float inreg %a, i32 inreg %b, i32 inreg %c, pt ; NEW_RBS-LABEL: vcc_to_scc: ; NEW_RBS: ; %bb.0: ; NEW_RBS-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 -; NEW_RBS-NEXT: s_or_b32 s0, s0, s0 +; NEW_RBS-NEXT: s_cmp_lg_u32 s0, 0 ; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0 ; NEW_RBS-NEXT: s_and_b32 s0, s0, 1 ; NEW_RBS-NEXT: s_cmp_lg_u32 s0, 0