From 3aeb9e4819facc7ce71bdd800071364df258bb86 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Wed, 9 Apr 2025 10:38:34 -0400 Subject: [PATCH 1/3] skip 16bit register for wmm reg sorting --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 9c737b4f3e378..8f488f5154650 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1650,7 +1650,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM // reserved registers. const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg); - if (TRI->getRegSizeInBits(*RC) > 32) + if (TRI->getRegSizeInBits(*RC) != 32) continue; SortedWWMVGPRs.push_back(Reg); } From 6688c608c7e5b49db1f4fb0a38a980305ab3cc3c Mon Sep 17 00:00:00 2001 From: guochen2 Date: Mon, 14 Apr 2025 12:13:54 -0400 Subject: [PATCH 2/3] test --- .../AMDGPU/wwm-reg-shift-down-gfx11plus.mir | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir b/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir new file mode 100644 index 0000000000000..3d4361c0d27a3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir @@ -0,0 +1,27 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=+real-true16 -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: wwm_skip_shift_16bit_reg +tracksRegLiveness: true +machineFunctionInfo: + wwmReservedRegs: ['$vgpr0_lo16'] + isEntryFunction: false +body: | + bb.0: + liveins: $vgpr0,$vgpr1,$sgpr0 + ; GCN-LABEL: name: wwm_skip_shift_16bit_reg + ; GCN: liveins: $vgpr0, $vgpr1, $sgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0_lo16, $sp_reg, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 + ; GCN-NEXT: undef $vgpr0_lo16 = V_CNDMASK_B16_t16_e64 0, $vgpr0_lo16, 0, $vgpr1_lo16, $sgpr0, 0, implicit $exec + ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_DWORD_SADDR $sp_reg, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_lo16(tied-def 0) :: (load (s16) from %stack.0, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 + undef $vgpr0_lo16 = V_CNDMASK_B16_t16_e64 0, $vgpr0_lo16, 0, $vgpr1_lo16, $sgpr0, 0, implicit $exec + SI_RETURN implicit $vgpr0 +... + From 25ac05c419ec09a49faf13ae1e7544810a862071 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Fri, 18 Apr 2025 16:47:24 -0400 Subject: [PATCH 3/3] update test --- .../AMDGPU/wwm-reg-shift-down-gfx11plus.mir | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir b/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir index 3d4361c0d27a3..41d0029b7f1f1 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir +++ b/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir @@ -1,27 +1,28 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=+real-true16 -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN %s --- -name: wwm_skip_shift_16bit_reg +name: wwm_reg_skip_sort_16bit tracksRegLiveness: true machineFunctionInfo: - wwmReservedRegs: ['$vgpr0_lo16'] - isEntryFunction: false + isEntryFunction: true body: | bb.0: - liveins: $vgpr0,$vgpr1,$sgpr0 - ; GCN-LABEL: name: wwm_skip_shift_16bit_reg - ; GCN: liveins: $vgpr0, $vgpr1, $sgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0_lo16, $sp_reg, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 - ; GCN-NEXT: undef $vgpr0_lo16 = V_CNDMASK_B16_t16_e64 0, $vgpr0_lo16, 0, $vgpr1_lo16, $sgpr0, 0, implicit $exec - ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_DWORD_SADDR $sp_reg, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_lo16(tied-def 0) :: (load (s16) from %stack.0, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 - ; GCN-NEXT: SI_RETURN implicit $vgpr0 - undef $vgpr0_lo16 = V_CNDMASK_B16_t16_e64 0, $vgpr0_lo16, 0, $vgpr1_lo16, $sgpr0, 0, implicit $exec - SI_RETURN implicit $vgpr0 + ; GCN-LABEL: name: wwm_reg_skip_sort_16bit + ; GCN: renamable $sgpr0 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF + ; GCN-NEXT: renamable $sgpr1 = V_READLANE_B32 $vgpr0, 31 + ; GCN-NEXT: renamable $sgpr2 = S_MOV_B32 0 + ; GCN-NEXT: undef $vgpr0_lo16 = V_CNDMASK_B16_t16_e64 0, 0, 0, killed $sgpr1, killed $sgpr2, 0, implicit $exec + ; GCN-NEXT: $exec_lo = EXIT_STRICT_WWM killed renamable $sgpr0 + ; GCN-NEXT: early-clobber renamable $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr1 + renamable $sgpr0 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec + $vgpr0 = IMPLICIT_DEF + renamable $sgpr1 = V_READLANE_B32 $vgpr0, 31 + renamable $sgpr2 = S_MOV_B32 0 + undef $vgpr0_lo16 = V_CNDMASK_B16_t16_e64 0, 0, 0, killed $sgpr1, killed $sgpr2, 0, implicit $exec + $exec_lo = EXIT_STRICT_WWM killed renamable $sgpr0 + early-clobber renamable $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + S_ENDPGM 0, implicit killed renamable $vgpr1 ... -