diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7cfff7c2f8ac0..41e24544778ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1433,7 +1433,7 @@ void GCNPassConfig::addOptimizedRegAlloc() { insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID); if (isPassEnabled(EnablePreRAOptimizations)) - insertPass(&RenameIndependentSubregsID, &GCNPreRAOptimizationsID); + insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID); // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation // instructions that cause scheduling barriers. diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index c0a87cf4ceacf..e77f4f69e265b 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -655,8 +655,8 @@ ; GCN-O1-OPTS-NEXT: Register Coalescer ; GCN-O1-OPTS-NEXT: Rename Disconnected Subregister Components ; GCN-O1-OPTS-NEXT: Rewrite Partial Register Uses -; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations ; GCN-O1-OPTS-NEXT: Machine Instruction Scheduler +; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations ; GCN-O1-OPTS-NEXT: SI Whole Quad Mode ; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA ; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA Long Branch Reg @@ -968,8 +968,8 @@ ; GCN-O2-NEXT: Register Coalescer ; GCN-O2-NEXT: Rename Disconnected Subregister Components ; GCN-O2-NEXT: Rewrite Partial Register Uses -; GCN-O2-NEXT: AMDGPU Pre-RA optimizations ; GCN-O2-NEXT: Machine Instruction Scheduler +; GCN-O2-NEXT: AMDGPU Pre-RA optimizations ; GCN-O2-NEXT: SI Whole Quad Mode ; GCN-O2-NEXT: SI optimize exec mask operations pre-RA ; GCN-O2-NEXT: SI Form memory clauses @@ -1295,8 +1295,8 @@ ; GCN-O3-NEXT: Register Coalescer ; GCN-O3-NEXT: Rename Disconnected Subregister Components ; GCN-O3-NEXT: Rewrite Partial Register Uses -; GCN-O3-NEXT: AMDGPU Pre-RA optimizations ; GCN-O3-NEXT: Machine Instruction Scheduler +; GCN-O3-NEXT: AMDGPU Pre-RA optimizations ; GCN-O3-NEXT: SI Whole Quad Mode ; GCN-O3-NEXT: SI optimize exec mask operations pre-RA ; GCN-O3-NEXT: SI Form memory clauses diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll index 2f82ceb37eb90..7283ec88a90d8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll @@ -76,10 +76,10 @@ define amdgpu_cs void @constant_mask_inverse_ballot(ptr addrspace(1) %out) { ; SDAG-LABEL: constant_mask_inverse_ballot: ; SDAG: ; %bb.0: ; %entry ; SDAG-NEXT: s_mov_b32 s0, 0xf8010000 -; SDAG-NEXT: s_mov_b32 s2, 0 ; SDAG-NEXT: s_mov_b32 s1, 64 -; SDAG-NEXT: v_mov_b32_e32 v3, s2 +; SDAG-NEXT: s_mov_b32 s2, 0 ; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; SDAG-NEXT: v_mov_b32_e32 v3, s2 ; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off ; SDAG-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll b/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll index 48f6beb60f01e..71d5747f5eece 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll @@ -42,10 +42,10 @@ define i32 @func_rounding() { ; GFX10-LABEL: func_rounding: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) -; GFX10-NEXT: s_lshl_b32 s6, s4, 2 +; GFX10-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4) ; GFX10-NEXT: s_mov_b32 s4, 0xeb24da71 ; GFX10-NEXT: s_mov_b32 s5, 0xc96f385 +; GFX10-NEXT: s_lshl_b32 s6, s6, 2 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 ; GFX10-NEXT: s_and_b32 s4, s4, 15 ; GFX10-NEXT: s_add_i32 s5, s4, 4 @@ -57,10 +57,10 @@ define i32 @func_rounding() { ; GFX11-LABEL: func_rounding: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) -; GFX11-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) ; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71 ; GFX11-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_and_b32 s0, s0, 15 ; GFX11-NEXT: s_add_i32 s1, s0, 4 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll b/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll index d69aae0b73747..ca7f56d9ff345 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll @@ -38,10 +38,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_add_i32 s34, s4, -4 -; GFX10-NEXT: s_min_u32 s34, s4, s34 -; GFX10-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-NEXT: s_min_u32 s36, s4, s34 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -50,10 +50,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_add_i32 s0, s4, -4 -; GFX11-NEXT: s_min_u32 s0, s4, s0 -; GFX11-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-NEXT: s_min_u32 s2, s4, s0 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -124,14 +124,14 @@ define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) { ; ; GFX10-LABEL: s_set_rounding_kernel: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX10-NEXT: s_load_dword s2, s[4:5], 0x24 +; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_add_i32 s1, s0, -4 -; GFX10-NEXT: s_min_u32 s2, s0, s1 -; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX10-NEXT: s_add_i32 s3, s2, -4 +; GFX10-NEXT: s_min_u32 s2, s2, s3 ; GFX10-NEXT: s_lshl_b32 s2, s2, 2 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 @@ -139,14 +139,14 @@ define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) { ; ; GFX11-LABEL: s_set_rounding_kernel: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24 +; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x24 +; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s2, s0, s1 -; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-NEXT: s_add_i32 s3, s2, -4 +; GFX11-NEXT: s_min_u32 s2, s2, s3 ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 @@ -294,10 +294,10 @@ define void @set_rounding_get_rounding() { ; GFX10-NEXT: s_cmp_lt_u32 s4, 4 ; GFX10-NEXT: s_cselect_b32 s4, s4, s5 ; GFX10-NEXT: s_add_i32 s5, s4, -4 -; GFX10-NEXT: s_min_u32 s4, s4, s5 -; GFX10-NEXT: s_lshl_b32 s6, s4, 2 +; GFX10-NEXT: s_min_u32 s6, s4, s5 ; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX10-NEXT: s_lshl_b32 s6, s6, 2 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -315,10 +315,10 @@ define void @set_rounding_get_rounding() { ; GFX11-NEXT: s_cmp_lt_u32 s0, 4 ; GFX11-NEXT: s_cselect_b32 s0, s0, s1 ; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s0, s0, s1 -; GFX11-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -974,10 +974,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_sext_i32_i16 s34, s4 ; GFX10-NEXT: s_add_i32 s35, s34, -4 -; GFX10-NEXT: s_min_u32 s34, s34, s35 -; GFX10-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-NEXT: s_min_u32 s36, s34, s35 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -987,10 +987,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_sext_i32_i16 s0, s4 ; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s0, s0, s1 -; GFX11-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1055,10 +1055,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_sext_i32_i16 s34, s4 ; GFX10-NEXT: s_add_i32 s35, s34, -4 -; GFX10-NEXT: s_min_u32 s34, s34, s35 -; GFX10-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-NEXT: s_min_u32 s36, s34, s35 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1068,10 +1068,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_sext_i32_i16 s0, s4 ; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s0, s0, s1 -; GFX11-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1136,10 +1136,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_and_b32 s34, 0xffff, s4 ; GFX10-NEXT: s_add_i32 s35, s34, -4 -; GFX10-NEXT: s_min_u32 s34, s34, s35 -; GFX10-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-NEXT: s_min_u32 s36, s34, s35 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1149,10 +1149,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_and_b32 s0, 0xffff, s4 ; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s0, s0, s1 -; GFX11-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1569,10 +1569,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) { ; GFX10-NEXT: v_readfirstlane_b32 s34, v0 ; GFX10-NEXT: s_lshl_b32 s34, s34, 2 ; GFX10-NEXT: s_add_i32 s35, s34, -4 -; GFX10-NEXT: s_min_u32 s34, s34, s35 -; GFX10-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-NEXT: s_min_u32 s36, s34, s35 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1586,10 +1586,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) { ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s0, s0, s1 -; GFX11-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1634,10 +1634,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) { ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s34, 3, 5 ; GFX10-NEXT: s_add_i32 s35, s34, -4 -; GFX10-NEXT: s_min_u32 s34, s34, s35 -; GFX10-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-NEXT: s_min_u32 s36, s34, s35 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1648,10 +1648,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) { ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 ; GFX11-NEXT: s_cselect_b32 s0, 3, 5 ; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s0, s0, s1 -; GFX11-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -1747,13 +1747,13 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() { ; GFX10-LABEL: get_rounding_after_set_rounding_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_round_mode 0x0 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) -; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_lshl_b32 s2, s0, 2 ; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) ; GFX10-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX10-NEXT: s_lshl_b32 s2, s2, 2 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_and_b32 s0, s0, 15 ; GFX10-NEXT: s_add_i32 s1, s0, 4 ; GFX10-NEXT: s_cmp_lt_u32 s0, 4 @@ -1766,11 +1766,11 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() { ; GFX11-LABEL: get_rounding_after_set_rounding_1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_round_mode 0x0 -; GFX11-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) -; GFX11-NEXT: s_lshl_b32 s2, s0, 2 ; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) ; GFX11-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX11-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_and_b32 s0, s0, 15 ; GFX11-NEXT: s_add_i32 s1, s0, 4 diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll index 4ebbb10fae187..fd62ba3f9da1f 100644 --- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll @@ -3185,8 +3185,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(p ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x7ff +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] @@ -3253,8 +3253,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(p ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x800 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] @@ -3321,8 +3321,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(p ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_movk_i32 s2, 0xfff +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] @@ -3389,8 +3389,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(p ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x1000 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] @@ -3457,8 +3457,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(p ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x1fff +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] @@ -3525,8 +3525,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(p ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x2000 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/roundeven.ll b/llvm/test/CodeGen/AMDGPU/roundeven.ll index 0f95c0255d3ab..0aff5ca25149f 100644 --- a/llvm/test/CodeGen/AMDGPU/roundeven.ll +++ b/llvm/test/CodeGen/AMDGPU/roundeven.ll @@ -1043,12 +1043,12 @@ define double @v_roundeven_f64(double %x) { ; SDAG_GFX6-LABEL: v_roundeven_f64: ; SDAG_GFX6: ; %bb.0: ; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG_GFX6-NEXT: s_brev_b32 s4, -2 +; SDAG_GFX6-NEXT: s_brev_b32 s6, -2 ; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0x43300000 -; SDAG_GFX6-NEXT: v_bfi_b32 v3, s4, v2, v1 +; SDAG_GFX6-NEXT: v_bfi_b32 v3, s6, v2, v1 ; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0 -; SDAG_GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3] ; SDAG_GFX6-NEXT: s_mov_b32 s4, -1 +; SDAG_GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3] ; SDAG_GFX6-NEXT: s_mov_b32 s5, 0x432fffff ; SDAG_GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3] ; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]