Skip to content

Commit c3d3223

Browse files
committed
[AMDGPU] Hint that s_cselect_b32 source-0 and destination ideally are assigned the same physical register.
Addresses #129984.
1 parent 857dc8d commit c3d3223

File tree

4 files changed

+19
-10
lines changed

4 files changed

+19
-10
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,15 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
897897
if (!Src1->isImm() || !isKImmOperand(*Src1))
898898
continue;
899899

900+
// Hint that the source and destination register should be allocated
901+
// as the same register so that we can shrink to S_CMOVK_I32 on the
902+
// post-allocation SIShrinkInstructions pass.
903+
if (Dest->getReg().isVirtual()) {
904+
MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
905+
MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
906+
continue;
907+
}
908+
900909
// The first source and destination must be the same register
901910
if (Src0->getReg() != Dest->getReg())
902911
continue;

llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1960,9 +1960,9 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
19601960
; GFX11-NEXT: s_movk_i32 s6, 0x7e00
19611961
; GFX11-NEXT: s_cmovk_i32 s5, 0x7c00
19621962
; GFX11-NEXT: s_cmp_lg_u32 s3, 0
1963-
; GFX11-NEXT: s_cselect_b32 s3, s6, 0x7c00
1963+
; GFX11-NEXT: s_cmovk_i32 s6, 0x7c00
19641964
; GFX11-NEXT: s_cmpk_eq_i32 s2, 0x40f
1965-
; GFX11-NEXT: s_cselect_b32 s2, s3, s5
1965+
; GFX11-NEXT: s_cselect_b32 s2, s6, s5
19661966
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
19671967
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, s2, v0
19681968
; GFX11-NEXT: global_store_b16 v1, v0, s[0:1]

llvm/test/CodeGen/AMDGPU/fptrunc.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,9 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
134134
; SI-NEXT: s_cmp_lt_i32 s0, 31
135135
; SI-NEXT: s_cmovk_i32 s6, 0x7c00
136136
; SI-NEXT: s_cmp_lg_u32 s1, 0
137-
; SI-NEXT: s_cselect_b32 s1, s2, 0x7c00
137+
; SI-NEXT: s_cmovk_i32 s2, 0x7c00
138138
; SI-NEXT: s_cmpk_eq_i32 s0, 0x40f
139-
; SI-NEXT: s_cselect_b32 s0, s1, s6
139+
; SI-NEXT: s_cselect_b32 s0, s2, s6
140140
; SI-NEXT: s_lshr_b32 s1, s7, 16
141141
; SI-NEXT: s_and_b32 s1, s1, 0x8000
142142
; SI-NEXT: s_or_b32 s6, s1, s0
@@ -314,9 +314,9 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
314314
; GFX10-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00
315315
; GFX10-SAFE-SDAG-NEXT: s_cmovk_i32 s5, 0x7c00
316316
; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
317-
; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00
317+
; GFX10-SAFE-SDAG-NEXT: s_cmovk_i32 s6, 0x7c00
318318
; GFX10-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f
319-
; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s2, s4, s5
319+
; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s2, s6, s5
320320
; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s3, s3, 16
321321
; GFX10-SAFE-SDAG-NEXT: s_and_b32 s3, s3, 0x8000
322322
; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s3, s2
@@ -446,9 +446,9 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
446446
; GFX11-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00
447447
; GFX11-SAFE-SDAG-NEXT: s_cmovk_i32 s5, 0x7c00
448448
; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
449-
; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00
449+
; GFX11-SAFE-SDAG-NEXT: s_cmovk_i32 s6, 0x7c00
450450
; GFX11-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f
451-
; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s2, s4, s5
451+
; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s2, s6, s5
452452
; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s3, s3, 16
453453
; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
454454
; GFX11-SAFE-SDAG-NEXT: s_and_b32 s3, s3, 0x8000

llvm/test/CodeGen/AMDGPU/shrink-select.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ body: |
1111
; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 0
1212
; GCN-NEXT: renamable $sgpr2 = S_MOV_B32 0
1313
; GCN-NEXT: renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc
14-
; GCN-NEXT: renamable $sgpr1 = S_CSELECT_B32 killed renamable $sgpr2, 31744, implicit killed $scc
15-
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr1, implicit killed renamable $sgpr0
14+
; GCN-NEXT: renamable $sgpr2 = S_CMOVK_I32 31744, implicit killed $scc
15+
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2, implicit killed renamable $sgpr0
1616
%0:sgpr_32 = S_MOV_B32 0
1717
%1:sgpr_32 = S_MOV_B32 0
1818
%2:sgpr_32 = S_MOV_B32 0

0 commit comments

Comments
 (0)