Skip to content

Commit 1fc8f32

Browse files
committed
[AMDGPU] Use s_cmovk_i32 instead of s_cselect_b32 when applicable
Addresses #129984.
1 parent 3954d25 commit 1fc8f32

File tree

5 files changed

+58
-14
lines changed

5 files changed

+58
-14
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,50 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
883883
}
884884
}
885885

886+
// Try to use S_MOVK_I32
887+
if (MI.getOpcode() == AMDGPU::S_CSELECT_B32) {
888+
const MachineOperand *Dest = &MI.getOperand(0);
889+
MachineOperand *Src0 = &MI.getOperand(1);
890+
MachineOperand *Src1 = &MI.getOperand(2);
891+
// Must be exactly one Immediate
892+
if (!(Src0->isReg() ^ Src1->isReg()))
893+
continue;
894+
895+
bool swapped = false;
896+
// Don't actually swap the MachineOperands yet
897+
// Could do it now, but don't want to since will modify generated
898+
// program even in cases where we don't insert a S_CMOVK_I32
899+
if (!Src0->isReg() && Src1->isReg()) {
900+
swapped = true;
901+
std::swap(Src0, Src1);
902+
}
903+
904+
if (!(Src1->isImm() && isKImmOperand(*Src1)))
905+
continue;
906+
907+
// Hint that the source and destination register should be allocated
908+
// as the same register so that we can shrink to S_CMOVK_I32 on the
909+
// post-allocation SIShrinkInstructions pass.
910+
if (Dest->getReg().isVirtual()) {
911+
MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
912+
MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
913+
continue;
914+
}
915+
916+
if (Src0->getReg() != Dest->getReg())
917+
continue;
918+
919+
// Actually swap the operands in the MachineInst now that we know we
920+
// are going through with the shrink
921+
if (swapped) {
922+
if (!TII->commuteInstruction(MI, false, 1, 2))
923+
continue;
924+
}
925+
926+
MI.setDesc(TII->get(AMDGPU::S_CMOVK_I32));
927+
MI.removeOperand(1);
928+
}
929+
886930
// Try to use S_ADDK_I32 and S_MULK_I32.
887931
if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
888932
MI.getOpcode() == AMDGPU::S_MUL_I32) {

llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ entry:
5858

5959
; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
6060
; GFX7 v_cmp_ne_u32
61-
; GFX7: s_cselect_b32
61+
; GFX7: s_cmovk_i32
6262
; GFX8: s_cmp_lg_u32
6363
; GFX8-NOT: v_cmp_ne_u32
64-
; GFX8: s_cselect_b32
64+
; GFX8: s_cmovk_i32
6565
define amdgpu_kernel void @null_32bit_lds_ptr(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
6666
%cmp = icmp ne ptr addrspace(3) %lds, null
6767
%x = select i1 %cmp, i32 123, i32 456

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) {
396396
; GCN-NEXT: s_waitcnt lgkmcnt(0)
397397
; GCN-NEXT: s_bitcmp1_b32 s0, 0
398398
; GCN-NEXT: s_movk_i32 s0, 0x80
399-
; GCN-NEXT: s_cselect_b32 s0, s0, 0x83
399+
; GCN-NEXT: s_cmovk_i32 s0, 0x83
400400
; GCN-NEXT: v_mov_b32_e32 v0, s0
401401
; GCN-NEXT: flat_store_short v[0:1], v0
402402
; GCN-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/fptrunc.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
242242
; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
243243
; VI-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
244244
; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
245-
; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
245+
; VI-SAFE-GISEL-NEXT: s_cmovk_i32 s2, 0x7c00
246246
; VI-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
247247
; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
248248
; VI-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
@@ -367,7 +367,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
367367
; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
368368
; GFX10-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
369369
; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
370-
; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
370+
; GFX10-SAFE-GISEL-NEXT: s_cmovk_i32 s2, 0x7c00
371371
; GFX10-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
372372
; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
373373
; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
@@ -502,7 +502,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
502502
; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
503503
; GFX11-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
504504
; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
505-
; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
505+
; GFX11-SAFE-GISEL-NEXT: s_cmovk_i32 s2, 0x7c00
506506
; GFX11-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
507507
; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
508508
; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16

llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,7 +1371,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13711371
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13721372
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
13731373
; GFX678-NEXT: s_movk_i32 s34, 0xa5
1374-
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50
1374+
; GFX678-NEXT: s_cmovk_i32 s34, 0xa50
13751375
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
13761376
; GFX678-NEXT: s_setpc_b64 s[30:31]
13771377
;
@@ -1380,7 +1380,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13801380
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13811381
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
13821382
; GFX9-NEXT: s_movk_i32 s34, 0xa5
1383-
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50
1383+
; GFX9-NEXT: s_cmovk_i32 s34, 0xa50
13841384
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
13851385
; GFX9-NEXT: s_setpc_b64 s[30:31]
13861386
;
@@ -1389,7 +1389,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13891389
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13901390
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
13911391
; GFX10-NEXT: s_movk_i32 s34, 0xa5
1392-
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50
1392+
; GFX10-NEXT: s_cmovk_i32 s34, 0xa50
13931393
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
13941394
; GFX10-NEXT: s_setpc_b64 s[30:31]
13951395
;
@@ -1398,7 +1398,7 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
13981398
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13991399
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
14001400
; GFX11-NEXT: s_movk_i32 s0, 0xa5
1401-
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50
1401+
; GFX11-NEXT: s_cmovk_i32 s0, 0xa50
14021402
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
14031403
; GFX11-NEXT: s_setpc_b64 s[30:31]
14041404
%cmp = icmp eq i32 %cond, 0
@@ -1413,7 +1413,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14131413
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14141414
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
14151415
; GFX678-NEXT: s_movk_i32 s34, 0xa50
1416-
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5
1416+
; GFX678-NEXT: s_cmovk_i32 s34, 0xa5
14171417
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
14181418
; GFX678-NEXT: s_setpc_b64 s[30:31]
14191419
;
@@ -1422,7 +1422,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14221422
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14231423
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
14241424
; GFX9-NEXT: s_movk_i32 s34, 0xa50
1425-
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5
1425+
; GFX9-NEXT: s_cmovk_i32 s34, 0xa5
14261426
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
14271427
; GFX9-NEXT: s_setpc_b64 s[30:31]
14281428
;
@@ -1431,7 +1431,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14311431
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14321432
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
14331433
; GFX10-NEXT: s_movk_i32 s34, 0xa50
1434-
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5
1434+
; GFX10-NEXT: s_cmovk_i32 s34, 0xa5
14351435
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
14361436
; GFX10-NEXT: s_setpc_b64 s[30:31]
14371437
;
@@ -1440,7 +1440,7 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
14401440
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14411441
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
14421442
; GFX11-NEXT: s_movk_i32 s0, 0xa50
1443-
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5
1443+
; GFX11-NEXT: s_cmovk_i32 s0, 0xa5
14441444
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
14451445
; GFX11-NEXT: s_setpc_b64 s[30:31]
14461446
%cmp = icmp eq i32 %cond, 0

0 commit comments

Comments
 (0)