From 5752242a14579fcc7973226e6da712548f8dddc2 Mon Sep 17 00:00:00 2001 From: Ana Mihajlovic Date: Wed, 26 Feb 2025 18:20:55 +0100 Subject: [PATCH 1/4] merge consecutive wait_alu instructions --- .../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 22 ++++++++++++++ .../AMDGPU/merge-consecutive-wait-alus.mir | 30 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp index 4df55eac5d76b..bb15d12ada650 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp @@ -164,6 +164,21 @@ class AMDGPUWaitSGPRHazards { BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP)); } + unsigned mergeMasks(unsigned Mask1, unsigned Mask2) { + unsigned Mask = Mask1 & Mask2; + + Mask = AMDGPU::DepCtr::encodeFieldVmVsrc( + Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1), + AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2))); + Mask = AMDGPU::DepCtr::encodeFieldVaSdst( + Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1), + AMDGPU::DepCtr::decodeFieldVaSdst(Mask2))); + Mask = AMDGPU::DepCtr::encodeFieldVaVdst( + Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1), + AMDGPU::DepCtr::decodeFieldVaVdst(Mask2))); + return Mask; + } + bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) { enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 }; @@ -362,6 +377,13 @@ class AMDGPUWaitSGPRHazards { Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0); } if (Emit) { + if (MI != MI->getParent()->begin()) { + MachineInstr &PrevMI = *std::prev(MI); + if (PrevMI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) { + Mask = mergeMasks(Mask, PrevMI.getOperand(0).getImm()); + PrevMI.eraseFromParent(); + } + } auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_WAITCNT_DEPCTR)) .addImm(Mask); diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir new file mode 100644 index 0000000000000..0cd203e6a9bbb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir @@ -0,0 +1,30 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass amdgpu-wait-sgpr-hazards -o - %s | FileCheck %s + + +--- +name: merge_consecutive_wait_alus +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: merge_consecutive_wait_alus + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo + ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946 + ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo + renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc + S_WAITCNT_DEPCTR 65530 + renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc +... + + +## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +# CHECK: {{.*}} From 8d02e56fb732c151b9e136377e2fe2c6c42ad2e3 Mon Sep 17 00:00:00 2001 From: Ana Mihajlovic Date: Thu, 6 Mar 2025 15:23:34 +0100 Subject: [PATCH 2/4] skip debug instructions --- .../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 29 +++++++-- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 36 +++++++++++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 18 ++++++ .../AMDGPU/merge-consecutive-wait-alus.mir | 64 ++++++++++++++++--- 4 files changed, 132 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp index bb15d12ada650..43109be2c23f6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp @@ -165,8 +165,13 @@ class AMDGPUWaitSGPRHazards { } unsigned mergeMasks(unsigned Mask1, unsigned Mask2) { - unsigned Mask = Mask1 & Mask2; - + unsigned Mask = 0xffff; + Mask = AMDGPU::DepCtr::encodeFieldSaSdst( + Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1), + AMDGPU::DepCtr::decodeFieldSaSdst(Mask2))); + Mask = AMDGPU::DepCtr::encodeFieldVaVcc( + Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1), + AMDGPU::DepCtr::decodeFieldVaVcc(Mask2))); Mask = AMDGPU::DepCtr::encodeFieldVmVsrc( Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1), AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2))); @@ -176,6 +181,12 @@ class AMDGPUWaitSGPRHazards { Mask = AMDGPU::DepCtr::encodeFieldVaVdst( Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1), AMDGPU::DepCtr::decodeFieldVaVdst(Mask2))); + Mask = AMDGPU::DepCtr::encodeFieldHoldCnt( + Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1), + AMDGPU::DepCtr::decodeFieldHoldCnt(Mask2))); + Mask = AMDGPU::DepCtr::encodeFieldVaSsrc( + Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1), + AMDGPU::DepCtr::decodeFieldVaSsrc(Mask2))); return Mask; } @@ -377,13 +388,17 @@ class AMDGPUWaitSGPRHazards { Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0); } if (Emit) { - if (MI != MI->getParent()->begin()) { - MachineInstr &PrevMI = *std::prev(MI); - if (PrevMI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) { - Mask = mergeMasks(Mask, PrevMI.getOperand(0).getImm()); - PrevMI.eraseFromParent(); + if (MI != MBB.instr_begin()) { + MachineBasicBlock::instr_iterator It = std::prev(MI); + while (It != MBB.instr_begin() && It->isDebugInstr()) + --It; + if (It->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) { + Mask = mergeMasks(Mask, It->getOperand(0).getImm()); + It->getOperand(0).setImm(Mask); + continue; } } + auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_WAITCNT_DEPCTR)) .addImm(Mask); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index b51cf536467b9..b166a8c206054 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -164,6 +164,18 @@ inline unsigned getSaSdstBitWidth() { return 1; } /// \returns SaSdst bit shift inline unsigned getSaSdstBitShift() { return 0; } +/// \returns VaSsrc width +inline unsigned getVaSsrcBitWidth() { return 1; } + +/// \returns VaSsrc bit shift +inline unsigned getVaSsrcBitShift() { return 8; } + +/// \returns HoldCnt bit shift +inline unsigned getHoldCntWidth() { return 1; } + +/// \returns HoldCnt bit shift +inline unsigned getHoldCntBitShift() { return 7; } + } // end anonymous namespace namespace llvm { @@ -1740,6 +1752,14 @@ unsigned decodeFieldVaVcc(unsigned Encoded) { return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth()); } +unsigned decodeFieldVaSsrc(unsigned Encoded) { + return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth()); +} + +unsigned decodeFieldHoldCnt(unsigned Encoded) { + return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth()); +} + unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); } @@ -1780,6 +1800,22 @@ unsigned encodeFieldVaVcc(unsigned VaVcc) { return encodeFieldVaVcc(0xffff, VaVcc); } +unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) { + return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth()); +} + +unsigned encodeFieldVaSsrc(unsigned VaSsrc) { + return encodeFieldVaSsrc(0xfff, VaSsrc); +} + +unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) { + return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth()); +} + +unsigned encodeFieldHoldCnt(unsigned HoldCnt) { + return encodeFieldHoldCnt(0xfff, HoldCnt); +} + } // namespace DepCtr //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index f54d5a273ca37..184f40bccfff8 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1180,6 +1180,12 @@ unsigned decodeFieldVaSdst(unsigned Encoded); /// \returns Decoded VaVcc from given immediate \p Encoded. unsigned decodeFieldVaVcc(unsigned Encoded); +/// \returns Decoded SaSrc from given immediate \p Encoded. +unsigned decodeFieldVaSsrc(unsigned Encoded); + +/// \returns Decoded HoldCnt from given immediate \p Encoded. +unsigned decodeFieldHoldCnt(unsigned Encoded); + /// \returns \p VmVsrc as an encoded Depctr immediate. unsigned encodeFieldVmVsrc(unsigned VmVsrc); @@ -1210,6 +1216,18 @@ unsigned encodeFieldVaVcc(unsigned VaVcc); /// \returns \p Encoded combined with encoded \p VaVcc. unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc); +/// \returns \p HoldCnt as an encoded Depctr immediate. +unsigned encodeFieldHoldCnt(unsigned HoldCnt); + +/// \returns \p Encoded combined with encoded \p HoldCnt. +unsigned encodeFieldHoldCnt(unsigned HoldCnt, unsigned Encoded); + +/// \returns \p VaSsrc as an encoded Depctr immediate. +unsigned encodeFieldVaSsrc(unsigned VaSsrc); + +/// \returns \p Encoded combined with encoded \p VaSsrc. +unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc); + } // namespace DepCtr namespace Exp { diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir index 0cd203e6a9bbb..13d0290dcac1d 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir @@ -4,12 +4,6 @@ --- name: merge_consecutive_wait_alus -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true body: | bb.0: liveins: $vgpr0 @@ -24,7 +18,61 @@ body: | S_WAITCNT_DEPCTR 65530 renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc ... +--- +name: merge_consecutive_wait_alus_two_bb +body: | + ; CHECK-LABEL: name: merge_consecutive_wait_alus_two_bb + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo + ; CHECK-NEXT: S_WAITCNT_DEPCTR 65530 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_WAITCNT_DEPCTR 61951 + ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo + bb.0: + liveins: $vgpr0 + + renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc + S_WAITCNT_DEPCTR 65530 + bb.1: + liveins: $sgpr0 -## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -# CHECK: {{.*}} + renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc +... +--- +name: meta_instructions +machineFunctionInfo: +body: | + bb.0: + ; CHECK-LABEL: name: meta_instructions + ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo + ; CHECK-NEXT: S_WAITCNT_DEPCTR 65530 + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: S_WAITCNT_DEPCTR 61951 + ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo + renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc + S_WAITCNT_DEPCTR 65530 + SCHED_BARRIER 0 + renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc +... +--- +name: debug_instruction +machineFunctionInfo: +body: | + bb.0: + ; CHECK-LABEL: name: debug_instruction + ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo + ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946 + ; CHECK-NEXT: DBG_VALUE $sgpr0 + ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo + renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc + S_WAITCNT_DEPCTR 65530 + DBG_VALUE $sgpr0 + renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc +... From f093ec6f3dc35218c9669347f19edc9064348622 Mon Sep 17 00:00:00 2001 From: Ana Mihajlovic Date: Fri, 7 Mar 2025 11:21:17 +0100 Subject: [PATCH 3/4] removed skipping hazard state update --- .../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp index 43109be2c23f6..527e6bf4e2211 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp @@ -190,6 +190,21 @@ class AMDGPUWaitSGPRHazards { return Mask; } + bool mergeSubsequentWaitAlus(MachineBasicBlock::instr_iterator &MI, + unsigned Mask) { + auto MBB = MI->getParent(); + if (MI != MBB->instr_begin()) { + MachineBasicBlock::instr_iterator It = std::prev(MI); + while (It != MBB->instr_begin() && It->isDebugInstr()) + --It; + if (It->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) { + It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm())); + return true; + } + } + return false; + } + bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) { enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 }; @@ -388,21 +403,12 @@ class AMDGPUWaitSGPRHazards { Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0); } if (Emit) { - if (MI != MBB.instr_begin()) { - MachineBasicBlock::instr_iterator It = std::prev(MI); - while (It != MBB.instr_begin() && It->isDebugInstr()) - --It; - if (It->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) { - Mask = mergeMasks(Mask, It->getOperand(0).getImm()); - It->getOperand(0).setImm(Mask); - continue; - } + if (!mergeSubsequentWaitAlus(MI, Mask)) { + auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(Mask); + updateGetPCBundle(NewMI); } - - auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(), - TII->get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(Mask); - updateGetPCBundle(NewMI); Emitted = true; } } From bba8686a76c83b36b865d5364a728c58873599dd Mon Sep 17 00:00:00 2001 From: Ana Mihajlovic Date: Tue, 11 Mar 2025 10:42:24 +0100 Subject: [PATCH 4/4] style changes, small fix --- .../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 25 +++++++++---------- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 +-- .../AMDGPU/merge-consecutive-wait-alus.mir | 1 + 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp index 527e6bf4e2211..bfdd8cf1bc2b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp @@ -190,19 +190,18 @@ class AMDGPUWaitSGPRHazards { return Mask; } - bool mergeSubsequentWaitAlus(MachineBasicBlock::instr_iterator &MI, - unsigned Mask) { + bool mergeConsecutiveWaitAlus(MachineBasicBlock::instr_iterator &MI, + unsigned Mask) { auto MBB = MI->getParent(); - if (MI != MBB->instr_begin()) { - MachineBasicBlock::instr_iterator It = std::prev(MI); - while (It != MBB->instr_begin() && It->isDebugInstr()) - --It; - if (It->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) { - It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm())); - return true; - } - } - return false; + if (MI == MBB->instr_begin()) + return false; + + auto It = prev_nodbg(MI, MBB->instr_begin()); + if (It->getOpcode() != AMDGPU::S_WAITCNT_DEPCTR) + return false; + + It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm())); + return true; } bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) { @@ -403,7 +402,7 @@ class AMDGPUWaitSGPRHazards { Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0); } if (Emit) { - if (!mergeSubsequentWaitAlus(MI, Mask)) { + if (!mergeConsecutiveWaitAlus(MI, Mask)) { auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_WAITCNT_DEPCTR)) .addImm(Mask); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index b166a8c206054..ac6b07bad3e35 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1805,7 +1805,7 @@ unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) { } unsigned encodeFieldVaSsrc(unsigned VaSsrc) { - return encodeFieldVaSsrc(0xfff, VaSsrc); + return encodeFieldVaSsrc(0xffff, VaSsrc); } unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) { @@ -1813,7 +1813,7 @@ unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) { } unsigned encodeFieldHoldCnt(unsigned HoldCnt) { - return encodeFieldHoldCnt(0xfff, HoldCnt); + return encodeFieldHoldCnt(0xffff, HoldCnt); } } // namespace DepCtr diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir index 13d0290dcac1d..d8f4c9c8f14b5 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir @@ -76,3 +76,4 @@ body: | DBG_VALUE $sgpr0 renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc ... +