From f3971129f5d5957be46faad42c34b50716d002a1 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 30 May 2025 11:42:35 +0100 Subject: [PATCH 1/3] precommit tests --- .../CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll | 38 +++++++++++++++++++ .../CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir | 26 +++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll create mode 100644 llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll new file mode 100644 index 0000000000000..59a62e9900623 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s + +define amdgpu_gs i32 @main() { +; CHECK-LABEL: main: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_bitcmp1_b32 0, 0 +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: s_cselect_b32 s1, -1, 0 +; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1 +; CHECK-NEXT: v_readfirstlane_b32 s1, v0 +; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: s_bitcmp1_b32 s0, 0 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: s_xor_b32 s0, s0, -1 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: s_wait_alu 0xf1ff +; CHECK-NEXT: ; return to shader part epilog +bb: + %i = call i1 @llvm.amdgcn.readfirstlane.i1(i1 false) + br label %bb1 + +bb1: + %i2 = zext i1 %i to i32 + %i3 = call i32 @llvm.amdgcn.wwm.i32(i32 0) + %i4 = call i32 @llvm.amdgcn.wwm.i32(i32 %i2) + %i5 = trunc i32 %i4 to i1 + %i6 = trunc i32 %i3 to i1 + %i7 = or i1 %i6, %i5 + %i8 = select i1 %i7, i32 0, i32 1 + ret i32 %i8 +} diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir new file mode 100644 index 0000000000000..b4e407bbcc158 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=si-fix-sgpr-copies %s | FileCheck %s + +--- +name: main +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: main + ; CHECK: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[DEF1]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed undef [[DEF]], killed undef [[V_READFIRSTLANE_B32_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed undef [[S_OR_B32_]], implicit-def dead $scc + ; CHECK-NEXT: S_CMP_EQ_U32 killed undef [[S_AND_B32_]], 1, implicit-def $scc + ; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 killed undef [[S_AND_B32_]], killed undef [[S_AND_B32_]], implicit-def dead $scc + ; CHECK-NEXT: SI_RETURN_TO_EPILOG undef $sgpr0 + %0:sreg_32 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + early-clobber %2:sreg_32 = STRICT_WWM killed undef %1, implicit $exec + %3:sreg_32 = S_OR_B32 killed undef %0, killed undef %2, implicit-def dead $scc + %4:sreg_32 = S_AND_B32 1, killed undef %3, implicit-def dead $scc + S_CMP_EQ_U32 killed undef %4, 1, implicit-def $scc + %5:sreg_32_xm0_xexec = S_XOR_B32 killed undef %4, killed undef %4, implicit-def dead $scc + SI_RETURN_TO_EPILOG undef $sgpr0 +... From 9df6331b9f3cd924b93fea845a50f03932506dff Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 30 May 2025 11:41:57 +0100 Subject: [PATCH 2/3] [AMDGPU] Fix SIFixSGPRCopies handling of STRICT_WWM and friends SIFixSGPRCopies handled STRICT_WWM (and similar WWM/WQM pseudos) like a COPY. In particular, if the source was a VGPR and the result was an SGPR, lowerVGPR2SGPRCopies would replace it with a readfirstlane, erasing the original pseudo and hence sabotaging the WWM region marking which is supposed to be performed by SIWholeQuadMode. Fix this by handling it more like INSERT_SUBREG, PHI and REG_SEQUENCE: if the source is a VGPR then move the result to a VGPR, and keep the pseudo. --- llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 10 +++++----- llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll | 8 +++++--- llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir | 5 +++-- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 2cf00b4e5cc66..1bf5b4a241780 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -634,11 +634,7 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) { switch (MI.getOpcode()) { default: continue; - case AMDGPU::COPY: - case AMDGPU::WQM: - case AMDGPU::STRICT_WQM: - case AMDGPU::SOFT_WQM: - case AMDGPU::STRICT_WWM: { + case AMDGPU::COPY: { const TargetRegisterClass *SrcRC, *DstRC; std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI); @@ -662,6 +658,10 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) { break; } + case AMDGPU::WQM: + case AMDGPU::STRICT_WQM: + case AMDGPU::SOFT_WQM: + case AMDGPU::STRICT_WWM: case AMDGPU::INSERT_SUBREG: case AMDGPU::PHI: case AMDGPU::REG_SEQUENCE: { diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll index 59a62e9900623..db32135939a5d 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll @@ -7,9 +7,11 @@ define amdgpu_gs i32 @main() { ; CHECK-NEXT: s_bitcmp1_b32 0, 0 ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: s_or_saveexec_b32 s2, -1 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; CHECK-NEXT: v_readfirstlane_b32 s1, v0 +; CHECK-NEXT: s_mov_b32 exec_lo, s2 ; CHECK-NEXT: s_or_b32 s0, s0, s1 ; CHECK-NEXT: s_wait_alu 0xfffe ; CHECK-NEXT: s_bitcmp1_b32 s0, 0 @@ -17,9 +19,9 @@ define amdgpu_gs i32 @main() { ; CHECK-NEXT: s_wait_alu 0xfffe ; CHECK-NEXT: s_xor_b32 s0, s0, -1 ; CHECK-NEXT: s_wait_alu 0xfffe -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: v_readfirstlane_b32 s0, v1 ; CHECK-NEXT: s_wait_alu 0xf1ff ; CHECK-NEXT: ; return to shader part epilog bb: diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir index b4e407bbcc158..7c0d5ed205ec0 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir @@ -9,8 +9,9 @@ body: | ; CHECK-LABEL: name: main ; CHECK: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[DEF1]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed undef [[DEF]], killed undef [[V_READFIRSTLANE_B32_]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[DEF1]], implicit $exec + ; CHECK-NEXT: early-clobber %2:sreg_32 = STRICT_WWM killed undef [[V_READFIRSTLANE_B32_]], implicit $exec + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed undef [[DEF]], killed undef %2, implicit-def dead $scc ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed undef [[S_OR_B32_]], implicit-def dead $scc ; CHECK-NEXT: S_CMP_EQ_U32 killed undef [[S_AND_B32_]], 1, implicit-def $scc ; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 killed undef [[S_AND_B32_]], killed undef [[S_AND_B32_]], implicit-def dead $scc From afba4e25dc52124ca4c03bfe378d587d4f570689 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 30 May 2025 13:55:40 +0100 Subject: [PATCH 3/3] Fix RUN line --- llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir index 7c0d5ed205ec0..869627f262b33 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=si-fix-sgpr-copies %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=si-fix-sgpr-copies %s -o - | FileCheck %s --- name: main