diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index 701084844cd9b..2bb70c138a50c 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -362,6 +362,10 @@ bool SIPreEmitPeephole::mustRetainExeczBranch( if (MI.isConditionalBranch()) return true; + if (MI.isUnconditionalBranch() && + TII->getBranchDestBlock(MI) != MBB.getNextNode()) + return true; + if (MI.isMetaInstruction()) continue; diff --git a/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir b/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir new file mode 100644 index 0000000000000..f45f48434fa23 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/remove-not-short-exec-branch-on-unconditional-jump.mir @@ -0,0 +1,90 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-pre-emit-peephole %s -o - | FileCheck %s +# Do no remove S_CBRANCH_EXECZ if the following block contains an unconditional +# branch to a block other than the one immediately following it. + +--- +name: test +body: | + ; CHECK-LABEL: name: test + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; CHECK-NEXT: V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr2_sgpr3 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr4_sgpr5 = IMPLICIT_DEF + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr4_sgpr5 = IMPLICIT_DEF + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: liveins: $vgpr1, $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc + ; CHECK-NEXT: renamable $vgpr0 = V_CVT_F32_U32_e32 killed $vgpr1, implicit $mode, implicit $exec + ; CHECK-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 + bb.0: + liveins: $vgpr0, $vgpr1 + + $sgpr0_sgpr1 = S_MOV_B64 $exec + V_CMPX_EQ_U32_nosdst_e32 0, killed $vgpr0, implicit-def $exec, implicit $exec + S_CBRANCH_EXECZ %bb.5, implicit $exec + + bb.1: + liveins: $vgpr1, $sgpr0_sgpr1 + + renamable $sgpr2_sgpr3 = IMPLICIT_DEF + renamable $sgpr4_sgpr5 = IMPLICIT_DEF + S_BRANCH %bb.2 + + bb.2: + liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 + + $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc + S_CBRANCH_EXECZ %bb.4, implicit $exec + + bb.3: + liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3 + + renamable $sgpr4_sgpr5 = IMPLICIT_DEF + S_BRANCH %bb.1 + + bb.4: + liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3 + + $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc + + bb.5: + liveins: $vgpr1, $sgpr0_sgpr1 + + $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc + renamable $vgpr0 = V_CVT_F32_U32_e32 killed $vgpr1, implicit $mode, implicit $exec + SI_RETURN_TO_EPILOG killed $vgpr0 + +...