Skip to content

Commit 857dc8d

Browse files
committed
[AMDGPU] Use s_cmovk_i32 instead of s_cselect_b32 when applicable
Partially addresses #129984.
1 parent 3c1c3f2 commit 857dc8d

File tree

7 files changed

+46
-24
lines changed

7 files changed

+46
-24
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,28 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
883883
}
884884
}
885885

886+
// Try to use S_CMOVK_I32 in place of S_CSELECT_B32
887+
if (MI.getOpcode() == AMDGPU::S_CSELECT_B32) {
888+
const MachineOperand *Dest = &MI.getOperand(0);
889+
MachineOperand *Src0 = &MI.getOperand(1);
890+
MachineOperand *Src1 = &MI.getOperand(2);
891+
892+
// First source must be a register
893+
if (!Src0->isReg())
894+
continue;
895+
896+
// Second source must be a K-immediate
897+
if (!Src1->isImm() || !isKImmOperand(*Src1))
898+
continue;
899+
900+
// The first source and destination must be the same register
901+
if (Src0->getReg() != Dest->getReg())
902+
continue;
903+
904+
MI.setDesc(TII->get(AMDGPU::S_CMOVK_I32));
905+
MI.removeOperand(1);
906+
}
907+
886908
// Try to use S_ADDK_I32 and S_MULK_I32.
887909
if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
888910
MI.getOpcode() == AMDGPU::S_MUL_I32) {

llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ entry:
5858

5959
; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
6060
; GFX7 v_cmp_ne_u32
61-
; GFX7: s_cselect_b32
61+
; GFX7: s_cmovk_i32
6262
; GFX8: s_cmp_lg_u32
6363
; GFX8-NOT: v_cmp_ne_u32
64-
; GFX8: s_cselect_b32
64+
; GFX8: s_cmovk_i32
6565
define amdgpu_kernel void @null_32bit_lds_ptr(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
6666
%cmp = icmp ne ptr addrspace(3) %lds, null
6767
%x = select i1 %cmp, i32 123, i32 456

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) {
399399
; GCN-NEXT: s_waitcnt lgkmcnt(0)
400400
; GCN-NEXT: s_bitcmp1_b32 s0, 0
401401
; GCN-NEXT: s_movk_i32 s0, 0x80
402-
; GCN-NEXT: s_cselect_b32 s0, s0, 0x83
402+
; GCN-NEXT: s_cmovk_i32 s0, 0x83
403403
; GCN-NEXT: v_mov_b32_e32 v0, s0
404404
; GCN-NEXT: flat_store_short v[0:1], v0
405405
; GCN-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,10 +1786,10 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
17861786
; SI-NEXT: s_lshr_b32 s4, s4, 2
17871787
; SI-NEXT: s_add_i32 s4, s4, s6
17881788
; SI-NEXT: s_cmp_lt_i32 s5, 31
1789-
; SI-NEXT: s_cselect_b32 s4, s4, 0x7c00
1789+
; SI-NEXT: s_cmovk_i32 s4, 0x7c00
17901790
; SI-NEXT: s_cmp_lg_u32 s2, 0
17911791
; SI-NEXT: s_movk_i32 s2, 0x7e00
1792-
; SI-NEXT: s_cselect_b32 s2, s2, 0x7c00
1792+
; SI-NEXT: s_cmovk_i32 s2, 0x7c00
17931793
; SI-NEXT: s_cmpk_eq_i32 s5, 0x40f
17941794
; SI-NEXT: s_cselect_b32 s2, s2, s4
17951795
; SI-NEXT: s_lshr_b32 s3, s3, 16
@@ -1844,10 +1844,10 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
18441844
; VI-NEXT: s_lshr_b32 s1, s1, 2
18451845
; VI-NEXT: s_add_i32 s1, s1, s3
18461846
; VI-NEXT: s_cmp_lt_i32 s2, 31
1847-
; VI-NEXT: s_cselect_b32 s1, s1, 0x7c00
1847+
; VI-NEXT: s_cmovk_i32 s1, 0x7c00
18481848
; VI-NEXT: s_cmp_lg_u32 s0, 0
18491849
; VI-NEXT: s_movk_i32 s0, 0x7e00
1850-
; VI-NEXT: s_cselect_b32 s0, s0, 0x7c00
1850+
; VI-NEXT: s_cmovk_i32 s0, 0x7c00
18511851
; VI-NEXT: s_cmpk_eq_i32 s2, 0x40f
18521852
; VI-NEXT: s_cselect_b32 s0, s0, s1
18531853
; VI-NEXT: s_movk_i32 s1, 0x7fff
@@ -1896,10 +1896,10 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
18961896
; GFX9-NEXT: s_lshr_b32 s4, s4, 2
18971897
; GFX9-NEXT: s_add_i32 s4, s4, s5
18981898
; GFX9-NEXT: s_cmp_lt_i32 s3, 31
1899-
; GFX9-NEXT: s_cselect_b32 s4, s4, 0x7c00
1899+
; GFX9-NEXT: s_cmovk_i32 s4, 0x7c00
19001900
; GFX9-NEXT: s_cmp_lg_u32 s2, 0
19011901
; GFX9-NEXT: s_movk_i32 s2, 0x7e00
1902-
; GFX9-NEXT: s_cselect_b32 s2, s2, 0x7c00
1902+
; GFX9-NEXT: s_cmovk_i32 s2, 0x7c00
19031903
; GFX9-NEXT: s_cmpk_eq_i32 s3, 0x40f
19041904
; GFX9-NEXT: s_cselect_b32 s2, s2, s4
19051905
; GFX9-NEXT: s_movk_i32 s3, 0x7fff
@@ -1958,7 +1958,7 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
19581958
; GFX11-NEXT: s_add_i32 s5, s5, s6
19591959
; GFX11-NEXT: s_cmp_lt_i32 s2, 31
19601960
; GFX11-NEXT: s_movk_i32 s6, 0x7e00
1961-
; GFX11-NEXT: s_cselect_b32 s5, s5, 0x7c00
1961+
; GFX11-NEXT: s_cmovk_i32 s5, 0x7c00
19621962
; GFX11-NEXT: s_cmp_lg_u32 s3, 0
19631963
; GFX11-NEXT: s_cselect_b32 s3, s6, 0x7c00
19641964
; GFX11-NEXT: s_cmpk_eq_i32 s2, 0x40f

llvm/test/CodeGen/AMDGPU/fptrunc.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
132132
; SI-NEXT: s_or_b32 s8, s8, s9
133133
; SI-NEXT: s_add_i32 s6, s6, s8
134134
; SI-NEXT: s_cmp_lt_i32 s0, 31
135-
; SI-NEXT: s_cselect_b32 s6, s6, 0x7c00
135+
; SI-NEXT: s_cmovk_i32 s6, 0x7c00
136136
; SI-NEXT: s_cmp_lg_u32 s1, 0
137137
; SI-NEXT: s_cselect_b32 s1, s2, 0x7c00
138138
; SI-NEXT: s_cmpk_eq_i32 s0, 0x40f
@@ -188,10 +188,10 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
188188
; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2
189189
; VI-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s8
190190
; VI-SAFE-SDAG-NEXT: s_cmp_lt_i32 s6, 31
191-
; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
191+
; VI-SAFE-SDAG-NEXT: s_cmovk_i32 s5, 0x7c00
192192
; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
193193
; VI-SAFE-SDAG-NEXT: s_movk_i32 s4, 0x7e00
194-
; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
194+
; VI-SAFE-SDAG-NEXT: s_cmovk_i32 s4, 0x7c00
195195
; VI-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s6, 0x40f
196196
; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, s5
197197
; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s7, 16
@@ -312,7 +312,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
312312
; GFX10-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6
313313
; GFX10-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31
314314
; GFX10-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00
315-
; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
315+
; GFX10-SAFE-SDAG-NEXT: s_cmovk_i32 s5, 0x7c00
316316
; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
317317
; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00
318318
; GFX10-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f
@@ -444,7 +444,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
444444
; GFX11-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6
445445
; GFX11-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31
446446
; GFX11-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00
447-
; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
447+
; GFX11-SAFE-SDAG-NEXT: s_cmovk_i32 s5, 0x7c00
448448
; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
449449
; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00
450450
; GFX11-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f

llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,7 +1371,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13711371
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13721372
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
13731373
; GFX678-NEXT: s_movk_i32 s34, 0xa5
1374-
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50
1374+
; GFX678-NEXT: s_cmovk_i32 s34, 0xa50
13751375
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
13761376
; GFX678-NEXT: s_setpc_b64 s[30:31]
13771377
;
@@ -1380,7 +1380,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13801380
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13811381
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
13821382
; GFX9-NEXT: s_movk_i32 s34, 0xa5
1383-
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50
1383+
; GFX9-NEXT: s_cmovk_i32 s34, 0xa50
13841384
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
13851385
; GFX9-NEXT: s_setpc_b64 s[30:31]
13861386
;
@@ -1389,7 +1389,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13891389
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13901390
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
13911391
; GFX10-NEXT: s_movk_i32 s34, 0xa5
1392-
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50
1392+
; GFX10-NEXT: s_cmovk_i32 s34, 0xa50
13931393
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
13941394
; GFX10-NEXT: s_setpc_b64 s[30:31]
13951395
;
@@ -1398,7 +1398,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13981398
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13991399
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
14001400
; GFX11-NEXT: s_movk_i32 s0, 0xa5
1401-
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50
1401+
; GFX11-NEXT: s_cmovk_i32 s0, 0xa50
14021402
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
14031403
; GFX11-NEXT: s_setpc_b64 s[30:31]
14041404
%cmp = icmp eq i32 %cond, 0
@@ -1413,7 +1413,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14131413
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14141414
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
14151415
; GFX678-NEXT: s_movk_i32 s34, 0xa50
1416-
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5
1416+
; GFX678-NEXT: s_cmovk_i32 s34, 0xa5
14171417
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
14181418
; GFX678-NEXT: s_setpc_b64 s[30:31]
14191419
;
@@ -1422,7 +1422,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14221422
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14231423
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
14241424
; GFX9-NEXT: s_movk_i32 s34, 0xa50
1425-
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5
1425+
; GFX9-NEXT: s_cmovk_i32 s34, 0xa5
14261426
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
14271427
; GFX9-NEXT: s_setpc_b64 s[30:31]
14281428
;
@@ -1431,7 +1431,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14311431
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14321432
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
14331433
; GFX10-NEXT: s_movk_i32 s34, 0xa50
1434-
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5
1434+
; GFX10-NEXT: s_cmovk_i32 s34, 0xa5
14351435
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
14361436
; GFX10-NEXT: s_setpc_b64 s[30:31]
14371437
;
@@ -1440,7 +1440,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14401440
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14411441
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
14421442
; GFX11-NEXT: s_movk_i32 s0, 0xa50
1443-
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5
1443+
; GFX11-NEXT: s_cmovk_i32 s0, 0xa5
14441444
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
14451445
; GFX11-NEXT: s_setpc_b64 s[30:31]
14461446
%cmp = icmp eq i32 %cond, 0

llvm/test/CodeGen/AMDGPU/shrink-select.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ tracksRegLiveness: true
2626
body: |
2727
bb.0:
2828
; GCN-LABEL: name: shrink-select-b32
29-
; GCN: renamable $sgpr0 = S_CSELECT_B32 undef renamable $sgpr0, 31744, implicit undef $scc
29+
; GCN: renamable $sgpr0 = S_CMOVK_I32 31744, implicit undef $scc
3030
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0
3131
%0:sgpr_32 = IMPLICIT_DEF
3232
$scc = IMPLICIT_DEF

0 commit comments

Comments
 (0)