Skip to content

Commit 46ec456

Browse files
committed
[AMDGPU] Use s_cmovk_i32 instead of s_cselect_b32 when applicable
Partially addresses #129984.
1 parent 5d344ac commit 46ec456

File tree

7 files changed

+62
-27
lines changed

7 files changed

+62
-27
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,41 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
883883
}
884884
}
885885

886+
// Try to use S_MOVK_I32
887+
if (MI.getOpcode() == AMDGPU::S_CSELECT_B32) {
888+
const MachineOperand *Dest = &MI.getOperand(0);
889+
MachineOperand *Src0 = &MI.getOperand(1);
890+
MachineOperand *Src1 = &MI.getOperand(2);
891+
// Must be exactly one Immediate
892+
if (!(Src0->isReg() ^ Src1->isReg()))
893+
continue;
894+
895+
bool swapped = false;
896+
// Don't actually swap the MachineOperands yet
897+
// Could do it now, but don't want to since will modify generated
898+
// program even in cases where we don't insert a S_CMOVK_I32
899+
if (!Src0->isReg() && Src1->isReg()) {
900+
swapped = true;
901+
std::swap(Src0, Src1);
902+
}
903+
904+
if (!(Src1->isImm() && isKImmOperand(*Src1)))
905+
continue;
906+
907+
if (Src0->getReg() != Dest->getReg())
908+
continue;
909+
910+
// Actually swap the operands in the MachineInst now that we know we
911+
// are going through with the shrink
912+
if (swapped) {
913+
if (!TII->commuteInstruction(MI, false, 1, 2))
914+
continue;
915+
}
916+
917+
MI.setDesc(TII->get(AMDGPU::S_CMOVK_I32));
918+
MI.removeOperand(1);
919+
}
920+
886921
// Try to use S_ADDK_I32 and S_MULK_I32.
887922
if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
888923
MI.getOpcode() == AMDGPU::S_MUL_I32) {

llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ entry:
5858

5959
; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
6060
; GFX7 v_cmp_ne_u32
61-
; GFX7: s_cselect_b32
61+
; GFX7: s_cmovk_i32
6262
; GFX8: s_cmp_lg_u32
6363
; GFX8-NOT: v_cmp_ne_u32
64-
; GFX8: s_cselect_b32
64+
; GFX8: s_cmovk_i32
6565
define amdgpu_kernel void @null_32bit_lds_ptr(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
6666
%cmp = icmp ne ptr addrspace(3) %lds, null
6767
%x = select i1 %cmp, i32 123, i32 456

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) {
399399
; GCN-NEXT: s_waitcnt lgkmcnt(0)
400400
; GCN-NEXT: s_bitcmp1_b32 s0, 0
401401
; GCN-NEXT: s_movk_i32 s0, 0x80
402-
; GCN-NEXT: s_cselect_b32 s0, s0, 0x83
402+
; GCN-NEXT: s_cmovk_i32 s0, 0x83
403403
; GCN-NEXT: v_mov_b32_e32 v0, s0
404404
; GCN-NEXT: flat_store_short v[0:1], v0
405405
; GCN-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,10 +1786,10 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
17861786
; SI-NEXT: s_lshr_b32 s4, s4, 2
17871787
; SI-NEXT: s_add_i32 s4, s4, s6
17881788
; SI-NEXT: s_cmp_lt_i32 s5, 31
1789-
; SI-NEXT: s_cselect_b32 s4, s4, 0x7c00
1789+
; SI-NEXT: s_cmovk_i32 s4, 0x7c00
17901790
; SI-NEXT: s_cmp_lg_u32 s2, 0
17911791
; SI-NEXT: s_movk_i32 s2, 0x7e00
1792-
; SI-NEXT: s_cselect_b32 s2, s2, 0x7c00
1792+
; SI-NEXT: s_cmovk_i32 s2, 0x7c00
17931793
; SI-NEXT: s_cmpk_eq_i32 s5, 0x40f
17941794
; SI-NEXT: s_cselect_b32 s2, s2, s4
17951795
; SI-NEXT: s_lshr_b32 s3, s3, 16
@@ -1844,10 +1844,10 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
18441844
; VI-NEXT: s_lshr_b32 s1, s1, 2
18451845
; VI-NEXT: s_add_i32 s1, s1, s3
18461846
; VI-NEXT: s_cmp_lt_i32 s2, 31
1847-
; VI-NEXT: s_cselect_b32 s1, s1, 0x7c00
1847+
; VI-NEXT: s_cmovk_i32 s1, 0x7c00
18481848
; VI-NEXT: s_cmp_lg_u32 s0, 0
18491849
; VI-NEXT: s_movk_i32 s0, 0x7e00
1850-
; VI-NEXT: s_cselect_b32 s0, s0, 0x7c00
1850+
; VI-NEXT: s_cmovk_i32 s0, 0x7c00
18511851
; VI-NEXT: s_cmpk_eq_i32 s2, 0x40f
18521852
; VI-NEXT: s_cselect_b32 s0, s0, s1
18531853
; VI-NEXT: s_movk_i32 s1, 0x7fff
@@ -1896,10 +1896,10 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
18961896
; GFX9-NEXT: s_lshr_b32 s4, s4, 2
18971897
; GFX9-NEXT: s_add_i32 s4, s4, s5
18981898
; GFX9-NEXT: s_cmp_lt_i32 s3, 31
1899-
; GFX9-NEXT: s_cselect_b32 s4, s4, 0x7c00
1899+
; GFX9-NEXT: s_cmovk_i32 s4, 0x7c00
19001900
; GFX9-NEXT: s_cmp_lg_u32 s2, 0
19011901
; GFX9-NEXT: s_movk_i32 s2, 0x7e00
1902-
; GFX9-NEXT: s_cselect_b32 s2, s2, 0x7c00
1902+
; GFX9-NEXT: s_cmovk_i32 s2, 0x7c00
19031903
; GFX9-NEXT: s_cmpk_eq_i32 s3, 0x40f
19041904
; GFX9-NEXT: s_cselect_b32 s2, s2, s4
19051905
; GFX9-NEXT: s_movk_i32 s3, 0x7fff
@@ -1958,7 +1958,7 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
19581958
; GFX11-NEXT: s_add_i32 s5, s5, s6
19591959
; GFX11-NEXT: s_cmp_lt_i32 s2, 31
19601960
; GFX11-NEXT: s_movk_i32 s6, 0x7e00
1961-
; GFX11-NEXT: s_cselect_b32 s5, s5, 0x7c00
1961+
; GFX11-NEXT: s_cmovk_i32 s5, 0x7c00
19621962
; GFX11-NEXT: s_cmp_lg_u32 s3, 0
19631963
; GFX11-NEXT: s_cselect_b32 s3, s6, 0x7c00
19641964
; GFX11-NEXT: s_cmpk_eq_i32 s2, 0x40f

llvm/test/CodeGen/AMDGPU/fptrunc.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
132132
; SI-NEXT: s_or_b32 s8, s8, s9
133133
; SI-NEXT: s_add_i32 s6, s6, s8
134134
; SI-NEXT: s_cmp_lt_i32 s0, 31
135-
; SI-NEXT: s_cselect_b32 s6, s6, 0x7c00
135+
; SI-NEXT: s_cmovk_i32 s6, 0x7c00
136136
; SI-NEXT: s_cmp_lg_u32 s1, 0
137137
; SI-NEXT: s_cselect_b32 s1, s2, 0x7c00
138138
; SI-NEXT: s_cmpk_eq_i32 s0, 0x40f
@@ -188,10 +188,10 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
188188
; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2
189189
; VI-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s8
190190
; VI-SAFE-SDAG-NEXT: s_cmp_lt_i32 s6, 31
191-
; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
191+
; VI-SAFE-SDAG-NEXT: s_cmovk_i32 s5, 0x7c00
192192
; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
193193
; VI-SAFE-SDAG-NEXT: s_movk_i32 s4, 0x7e00
194-
; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
194+
; VI-SAFE-SDAG-NEXT: s_cmovk_i32 s4, 0x7c00
195195
; VI-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s6, 0x40f
196196
; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, s5
197197
; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s7, 16
@@ -240,7 +240,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
240240
; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
241241
; VI-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
242242
; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
243-
; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
243+
; VI-SAFE-GISEL-NEXT: s_cmovk_i32 s2, 0x7c00
244244
; VI-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
245245
; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
246246
; VI-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
@@ -312,7 +312,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
312312
; GFX10-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6
313313
; GFX10-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31
314314
; GFX10-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00
315-
; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
315+
; GFX10-SAFE-SDAG-NEXT: s_cmovk_i32 s5, 0x7c00
316316
; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
317317
; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00
318318
; GFX10-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f
@@ -365,7 +365,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
365365
; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
366366
; GFX10-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
367367
; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
368-
; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
368+
; GFX10-SAFE-GISEL-NEXT: s_cmovk_i32 s2, 0x7c00
369369
; GFX10-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
370370
; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
371371
; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
@@ -444,7 +444,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
444444
; GFX11-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6
445445
; GFX11-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31
446446
; GFX11-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00
447-
; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
447+
; GFX11-SAFE-SDAG-NEXT: s_cmovk_i32 s5, 0x7c00
448448
; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
449449
; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00
450450
; GFX11-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f
@@ -501,7 +501,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
501501
; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
502502
; GFX11-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
503503
; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
504-
; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
504+
; GFX11-SAFE-GISEL-NEXT: s_cmovk_i32 s2, 0x7c00
505505
; GFX11-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
506506
; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
507507
; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16

llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,7 +1371,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13711371
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13721372
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
13731373
; GFX678-NEXT: s_movk_i32 s34, 0xa5
1374-
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50
1374+
; GFX678-NEXT: s_cmovk_i32 s34, 0xa50
13751375
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
13761376
; GFX678-NEXT: s_setpc_b64 s[30:31]
13771377
;
@@ -1380,7 +1380,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13801380
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13811381
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
13821382
; GFX9-NEXT: s_movk_i32 s34, 0xa5
1383-
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50
1383+
; GFX9-NEXT: s_cmovk_i32 s34, 0xa50
13841384
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
13851385
; GFX9-NEXT: s_setpc_b64 s[30:31]
13861386
;
@@ -1389,7 +1389,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13891389
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13901390
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
13911391
; GFX10-NEXT: s_movk_i32 s34, 0xa5
1392-
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50
1392+
; GFX10-NEXT: s_cmovk_i32 s34, 0xa50
13931393
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
13941394
; GFX10-NEXT: s_setpc_b64 s[30:31]
13951395
;
@@ -1398,7 +1398,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13981398
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13991399
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
14001400
; GFX11-NEXT: s_movk_i32 s0, 0xa5
1401-
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50
1401+
; GFX11-NEXT: s_cmovk_i32 s0, 0xa50
14021402
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
14031403
; GFX11-NEXT: s_setpc_b64 s[30:31]
14041404
%cmp = icmp eq i32 %cond, 0
@@ -1413,7 +1413,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14131413
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14141414
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
14151415
; GFX678-NEXT: s_movk_i32 s34, 0xa50
1416-
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5
1416+
; GFX678-NEXT: s_cmovk_i32 s34, 0xa5
14171417
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
14181418
; GFX678-NEXT: s_setpc_b64 s[30:31]
14191419
;
@@ -1422,7 +1422,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14221422
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14231423
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
14241424
; GFX9-NEXT: s_movk_i32 s34, 0xa50
1425-
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5
1425+
; GFX9-NEXT: s_cmovk_i32 s34, 0xa5
14261426
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
14271427
; GFX9-NEXT: s_setpc_b64 s[30:31]
14281428
;
@@ -1431,7 +1431,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14311431
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14321432
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
14331433
; GFX10-NEXT: s_movk_i32 s34, 0xa50
1434-
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5
1434+
; GFX10-NEXT: s_cmovk_i32 s34, 0xa5
14351435
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
14361436
; GFX10-NEXT: s_setpc_b64 s[30:31]
14371437
;
@@ -1440,7 +1440,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14401440
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14411441
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
14421442
; GFX11-NEXT: s_movk_i32 s0, 0xa50
1443-
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5
1443+
; GFX11-NEXT: s_cmovk_i32 s0, 0xa5
14441444
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
14451445
; GFX11-NEXT: s_setpc_b64 s[30:31]
14461446
%cmp = icmp eq i32 %cond, 0

llvm/test/CodeGen/AMDGPU/shrink-select.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ tracksRegLiveness: true
2626
body: |
2727
bb.0:
2828
; GCN-LABEL: name: shrink-select
29-
; GCN: renamable $sgpr0 = S_CSELECT_B32 undef renamable $sgpr0, 31744, implicit undef $scc
29+
; GCN: renamable $sgpr0 = S_CMOVK_I32 31744, implicit undef $scc
3030
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0
3131
%0:sgpr_32 = IMPLICIT_DEF
3232
$scc = IMPLICIT_DEF

0 commit comments

Comments
 (0)