Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,37 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
}
}

// Try to use S_CMOVK_I32 in place of S_CSELECT_B32
if (MI.getOpcode() == AMDGPU::S_CSELECT_B32) {
const MachineOperand *Dest = &MI.getOperand(0);
MachineOperand *Src0 = &MI.getOperand(1);
MachineOperand *Src1 = &MI.getOperand(2);

// First source must be a register
if (!Src0->isReg())
continue;

// Second source must be a K-immediate
if (!Src1->isImm() || !isKImmOperand(*Src1))
continue;

// Hint that the source and destination register should be allocated
// as the same register so that we can shrink to S_CMOVK_I32 on the
// post-allocation SIShrinkInstructions pass.
if (Dest->getReg().isVirtual()) {
MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
continue;
}

// The first source and destination must be the same register
if (Src0->getReg() != Dest->getReg())
continue;

MI.setDesc(TII->get(AMDGPU::S_CMOVK_I32));
MI.removeOperand(1);
}

// Try to use S_ADDK_I32 and S_MULK_I32.
if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
MI.getOpcode() == AMDGPU::S_MUL_I32) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ entry:

; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
; GFX7 v_cmp_ne_u32
; GFX7: s_cselect_b32
; GFX7: s_cmovk_i32
; GFX8: s_cmp_lg_u32
; GFX8-NOT: v_cmp_ne_u32
; GFX8: s_cselect_b32
; GFX8: s_cmovk_i32
define amdgpu_kernel void @null_32bit_lds_ptr(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
%cmp = icmp ne ptr addrspace(3) %lds, null
%x = select i1 %cmp, i32 123, i32 456
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_bitcmp1_b32 s0, 0
; GCN-NEXT: s_movk_i32 s0, 0x80
; GCN-NEXT: s_cselect_b32 s0, s0, 0x83
; GCN-NEXT: s_cmovk_i32 s0, 0x83
; GCN-NEXT: v_mov_b32_e32 v0, s0
; GCN-NEXT: flat_store_short v[0:1], v0
; GCN-NEXT: s_endpgm
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1786,10 +1786,10 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
; SI-NEXT: s_lshr_b32 s4, s4, 2
; SI-NEXT: s_add_i32 s4, s4, s6
; SI-NEXT: s_cmp_lt_i32 s5, 31
; SI-NEXT: s_cselect_b32 s4, s4, 0x7c00
; SI-NEXT: s_cmovk_i32 s4, 0x7c00
; SI-NEXT: s_cmp_lg_u32 s2, 0
; SI-NEXT: s_movk_i32 s2, 0x7e00
; SI-NEXT: s_cselect_b32 s2, s2, 0x7c00
; SI-NEXT: s_cmovk_i32 s2, 0x7c00
; SI-NEXT: s_cmpk_eq_i32 s5, 0x40f
; SI-NEXT: s_cselect_b32 s2, s2, s4
; SI-NEXT: s_lshr_b32 s3, s3, 16
Expand Down Expand Up @@ -1844,10 +1844,10 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
; VI-NEXT: s_lshr_b32 s1, s1, 2
; VI-NEXT: s_add_i32 s1, s1, s3
; VI-NEXT: s_cmp_lt_i32 s2, 31
; VI-NEXT: s_cselect_b32 s1, s1, 0x7c00
; VI-NEXT: s_cmovk_i32 s1, 0x7c00
; VI-NEXT: s_cmp_lg_u32 s0, 0
; VI-NEXT: s_movk_i32 s0, 0x7e00
; VI-NEXT: s_cselect_b32 s0, s0, 0x7c00
; VI-NEXT: s_cmovk_i32 s0, 0x7c00
; VI-NEXT: s_cmpk_eq_i32 s2, 0x40f
; VI-NEXT: s_cselect_b32 s0, s0, s1
; VI-NEXT: s_movk_i32 s1, 0x7fff
Expand Down Expand Up @@ -1896,10 +1896,10 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
; GFX9-NEXT: s_lshr_b32 s4, s4, 2
; GFX9-NEXT: s_add_i32 s4, s4, s5
; GFX9-NEXT: s_cmp_lt_i32 s3, 31
; GFX9-NEXT: s_cselect_b32 s4, s4, 0x7c00
; GFX9-NEXT: s_cmovk_i32 s4, 0x7c00
; GFX9-NEXT: s_cmp_lg_u32 s2, 0
; GFX9-NEXT: s_movk_i32 s2, 0x7e00
; GFX9-NEXT: s_cselect_b32 s2, s2, 0x7c00
; GFX9-NEXT: s_cmovk_i32 s2, 0x7c00
; GFX9-NEXT: s_cmpk_eq_i32 s3, 0x40f
; GFX9-NEXT: s_cselect_b32 s2, s2, s4
; GFX9-NEXT: s_movk_i32 s3, 0x7fff
Expand Down Expand Up @@ -1958,11 +1958,11 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
; GFX11-NEXT: s_add_i32 s5, s5, s6
; GFX11-NEXT: s_cmp_lt_i32 s2, 31
; GFX11-NEXT: s_movk_i32 s6, 0x7e00
; GFX11-NEXT: s_cselect_b32 s5, s5, 0x7c00
; GFX11-NEXT: s_cmovk_i32 s5, 0x7c00
; GFX11-NEXT: s_cmp_lg_u32 s3, 0
; GFX11-NEXT: s_cselect_b32 s3, s6, 0x7c00
; GFX11-NEXT: s_cmovk_i32 s6, 0x7c00
; GFX11-NEXT: s_cmpk_eq_i32 s2, 0x40f
; GFX11-NEXT: s_cselect_b32 s2, s3, s5
; GFX11-NEXT: s_cselect_b32 s2, s6, s5
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, s2, v0
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/fptrunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,11 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; SI-NEXT: s_or_b32 s8, s8, s9
; SI-NEXT: s_add_i32 s6, s6, s8
; SI-NEXT: s_cmp_lt_i32 s0, 31
; SI-NEXT: s_cselect_b32 s6, s6, 0x7c00
; SI-NEXT: s_cmovk_i32 s6, 0x7c00
; SI-NEXT: s_cmp_lg_u32 s1, 0
; SI-NEXT: s_cselect_b32 s1, s2, 0x7c00
; SI-NEXT: s_cmovk_i32 s2, 0x7c00
; SI-NEXT: s_cmpk_eq_i32 s0, 0x40f
; SI-NEXT: s_cselect_b32 s0, s1, s6
; SI-NEXT: s_cselect_b32 s0, s2, s6
; SI-NEXT: s_lshr_b32 s1, s7, 16
; SI-NEXT: s_and_b32 s1, s1, 0x8000
; SI-NEXT: s_or_b32 s6, s1, s0
Expand Down Expand Up @@ -188,10 +188,10 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2
; VI-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s8
; VI-SAFE-SDAG-NEXT: s_cmp_lt_i32 s6, 31
; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
; VI-SAFE-SDAG-NEXT: s_cmovk_i32 s5, 0x7c00
; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
; VI-SAFE-SDAG-NEXT: s_movk_i32 s4, 0x7e00
; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
; VI-SAFE-SDAG-NEXT: s_cmovk_i32 s4, 0x7c00
; VI-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s6, 0x40f
; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, s5
; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s7, 16
Expand Down Expand Up @@ -312,11 +312,11 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX10-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6
; GFX10-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31
; GFX10-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00
; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
; GFX10-SAFE-SDAG-NEXT: s_cmovk_i32 s5, 0x7c00
; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00
; GFX10-SAFE-SDAG-NEXT: s_cmovk_i32 s6, 0x7c00
; GFX10-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f
; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s2, s4, s5
; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s2, s6, s5
; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s3, s3, 16
; GFX10-SAFE-SDAG-NEXT: s_and_b32 s3, s3, 0x8000
; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s3, s2
Expand Down Expand Up @@ -444,11 +444,11 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX11-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6
; GFX11-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31
; GFX11-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00
; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
; GFX11-SAFE-SDAG-NEXT: s_cmovk_i32 s5, 0x7c00
; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00
; GFX11-SAFE-SDAG-NEXT: s_cmovk_i32 s6, 0x7c00
; GFX11-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f
; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s2, s4, s5
; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s2, s6, s5
; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s3, s3, 16
; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-SAFE-SDAG-NEXT: s_and_b32 s3, s3, 0x8000
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1371,7 +1371,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_movk_i32 s34, 0xa5
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50
; GFX678-NEXT: s_cmovk_i32 s34, 0xa50
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -1380,7 +1380,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_movk_i32 s34, 0xa5
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50
; GFX9-NEXT: s_cmovk_i32 s34, 0xa50
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -1389,7 +1389,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_movk_i32 s34, 0xa5
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50
; GFX10-NEXT: s_cmovk_i32 s34, 0xa50
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -1398,7 +1398,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_movk_i32 s0, 0xa5
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50
; GFX11-NEXT: s_cmovk_i32 s0, 0xa50
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
Expand All @@ -1413,7 +1413,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_movk_i32 s34, 0xa50
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5
; GFX678-NEXT: s_cmovk_i32 s34, 0xa5
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -1422,7 +1422,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_movk_i32 s34, 0xa50
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5
; GFX9-NEXT: s_cmovk_i32 s34, 0xa5
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -1431,7 +1431,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_movk_i32 s34, 0xa50
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5
; GFX10-NEXT: s_cmovk_i32 s34, 0xa5
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -1440,7 +1440,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_movk_i32 s0, 0xa50
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5
; GFX11-NEXT: s_cmovk_i32 s0, 0xa5
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
Expand Down
75 changes: 75 additions & 0 deletions llvm/test/CodeGen/AMDGPU/shrink-select.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -start-before=si-shrink-instructions -stop-before=si-post-ra-bundler -o - %s | FileCheck -check-prefix=GCN %s

---
name: shrink-select-hint
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink-select-hint
; GCN: renamable $sgpr0 = S_MOV_B32 0
; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 0
; GCN-NEXT: renamable $sgpr2 = S_MOV_B32 0
; GCN-NEXT: renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc
; GCN-NEXT: renamable $sgpr2 = S_CMOVK_I32 31744, implicit killed $scc
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2, implicit killed renamable $sgpr0
%0:sgpr_32 = S_MOV_B32 0
%1:sgpr_32 = S_MOV_B32 0
%2:sgpr_32 = S_MOV_B32 0
%3:sgpr_32 = S_ADD_U32 killed %0, %1, implicit-def $scc
%4:sgpr_32 = S_CSELECT_B32 %2, 31744, implicit $scc
S_ENDPGM 0, implicit %4, implicit %3
...
---
name: shrink-select-b32
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink-select-b32
; GCN: renamable $sgpr0 = S_CMOVK_I32 31744, implicit undef $scc
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0
%0:sgpr_32 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
%1:sgpr_32 = S_CSELECT_B32 %0, 31744, implicit $scc
S_ENDPGM 0, implicit %1
...
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also the b64 versions, and tests with exotic operand types

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I expanded the tests to include b64. For operand types, an assertion fired when I tried to use a floating point operand, but I did add a case where the second operand is a register and one where the second operand is an immediate but not a K-Immediate.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't use FP immediate, only ever use isImm

---
name: shrink-select-b64
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink-select-b64
; GCN: renamable $sgpr0_sgpr1 = S_CSELECT_B64 undef renamable $sgpr0_sgpr1, 31744, implicit undef $scc
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1
%0:sgpr_64 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
%1:sgpr_64 = S_CSELECT_B64 %0, 31744, implicit $scc
S_ENDPGM 0, implicit %1
...
---
name: shrink-select-non-kimm
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink-select-non-kimm
; GCN: renamable $sgpr0 = S_CSELECT_B32 undef renamable $sgpr0, 16, implicit undef $scc
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0
%0:sgpr_32 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
%1:sgpr_32 = S_CSELECT_B32 %0, 16, implicit $scc
S_ENDPGM 0, implicit %1
...
---
name: shrink-select-reg
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink-select-reg
; GCN: renamable $sgpr0 = S_CSELECT_B32 undef renamable $sgpr0, undef renamable $sgpr0, implicit undef $scc
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0
%0:sgpr_32 = IMPLICIT_DEF
%1:sgpr_32 = IMPLICIT_DEF
$scc = IMPLICIT_DEF
%2:sgpr_32 = S_CSELECT_B32 %0, %1, implicit $scc
S_ENDPGM 0, implicit %2
...