From 62d9aed7214ffd3861d769ce59abd76c27799553 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sat, 12 Oct 2024 23:58:25 -0400 Subject: [PATCH] [AMDGPU] Skip non-first termintors when forcing emit zero flag --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 4 ++- .../waitcnt-debug-non-first-terminators.mir | 33 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 0762bcf4353df..016f7c325d644 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1824,7 +1824,9 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, // Verify that the wait is actually needed. ScoreBrackets.simplifyWaitcnt(Wait); - if (ForceEmitZeroFlag) + // When forcing emit, we need to skip terminators because that would break the + // terminators of the MBB if we emit a waitcnt between terminators. + if (ForceEmitZeroFlag && !MI.isTerminator()) Wait = WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false); if (ForceEmitWaitcnt[LOAD_CNT]) diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir new file mode 100644 index 0000000000000..bccf266117a7d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir @@ -0,0 +1,33 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcezero=1 %s -o - | FileCheck %s + +--- +name: waitcnt-debug-non-first-terminators +liveins: +machineFunctionInfo: + isEntryFunction: true +body: | + ; CHECK-LABEL: name: waitcnt-debug-non-first-terminators + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; CHECK-NEXT: S_BRANCH %bb.2, implicit $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_WAITCNT 0 + ; CHECK-NEXT: S_NOP 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: S_WAITCNT 0 + ; CHECK-NEXT: S_NOP 0 + bb.0: + S_CBRANCH_SCC1 %bb.1, implicit $scc + S_BRANCH %bb.2, implicit $scc + bb.1: + S_NOP 0 + bb.2: + S_NOP 0 +...