Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,22 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
return lowerUnpackAExt(MI);
case WidenMMOToS32:
return widenMMOToS32(cast<GAnyLoad>(MI));
case WaterfallCall: {
SmallSet<Register, 4> SGPROperandRegs;
SGPROperandRegs.insert(MI.getOperand(1).getReg());

MachineBasicBlock::iterator Start(&MI);
while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
--Start;
MachineBasicBlock::iterator End(&MI);
while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
++End;
++End;
B.setInsertPt(B.getMBB(), Start);

executeInWaterfallLoop(B, make_range(Start, End), SGPROperandRegs);
break;
}
}

if (!WaterfallSgprs.empty()) {
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
case _:
return true;
case PhysReg:
return true;
default:
llvm_unreachable("missing matchUniformityAndLLT");
}
Expand Down Expand Up @@ -915,6 +917,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,

addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}});

addRulesForGOpcs({G_SI_CALL})
.Any({{PhysReg, UniP0}, {{None}, {SgprP0}}})
.Any({{PhysReg, DivP0}, {{None}, {VgprP0}, WaterfallCall}})
.Any({{PhysReg, UniP4}, {{None}, {SgprP4}}})
.Any({{PhysReg, DivP4}, {{None}, {VgprP4}, WaterfallCall}});

bool hasSALUFloat = ST->hasSALUFloatInsts();

addRulesForGOpcs({G_FADD}, Standard)
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ bool isAnyPtr(LLT Ty, unsigned Width);
// be checked.
enum UniformityLLTOpPredicateID {
_,
PhysReg,
// scalars
S1,
S16,
Expand Down Expand Up @@ -134,6 +135,7 @@ enum RegBankLLTMappingApplyID {
Sgpr32,
Sgpr64,
Sgpr128,
SgprP0,
SgprP1,
SgprP3,
SgprP4,
Expand Down Expand Up @@ -224,7 +226,8 @@ enum LoweringMethodID {
SplitLoad,
WidenLoad,
WidenMMOToS32,
UnpackAExt
UnpackAExt,
WaterfallCall
};

enum FastRulesTypes {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -o - %s | FileCheck %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -new-reg-bank-select -o - %s | FileCheck %s

; TODO: Could potentially insert it here
define void @arg_align_8(ptr addrspace(1) align 8 %arg0) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s

; Test that we don't insert code to pass implicit arguments we know
; the callee does not need.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX900 %s
; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX908 %s
; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX900 %s
; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX908 %s

; Workitem IDs are passed to the kernel differently for gfx908

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-new-reg-bank-select
has no impact for tests with -stop-after=irtranslator, it is later in the pipeline


; amdgpu_gfx calling convention
declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s

declare i1 @external_i1_func_void() #0
declare zeroext i1 @external_i1_zeroext_func_void() #0
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s

declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -global-isel-abort=2 -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
; RUN: llc -global-isel -new-reg-bank-select -global-isel-abort=2 -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s

declare hidden void @external_void_func_void() #0

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s

define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; This is a copy of sibling-call.ll, but stops after the IRTranslator.

define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs -run-pass=localizer -o - %s | FileCheck %s
# RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs -run-pass=localizer -o - %s | FileCheck %s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-global-isel -new-reg-bank-select
has no impact on -run-pass either


# Previously this was placing the new G_CONSTANT after the use call
---
Expand Down
215 changes: 215 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize -o - %s | FileCheck %s

---
name: waterfall_divergent_call_p0_no_args
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1

; CHECK-LABEL: name: waterfall_divergent_call_p0_no_args
; CHECK: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p0)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p0) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p0), %func_ptr
; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0)
; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0))
; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .4:
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
ADJCALLSTACKUP 0, 0, implicit-def $scc
%g_ptr:_(p0) = COPY $sgpr0_sgpr1
%func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0))
$sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu
ADJCALLSTACKDOWN 0, 0, implicit-def $scc
S_SETPC_B64_return undef $sgpr2_sgpr3

...

---
name: waterfall_divergent_call_p4_no_args
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1

; CHECK-LABEL: name: waterfall_divergent_call_p4_no_args
; CHECK: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p4)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p4) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p4), %func_ptr
; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: %g_ptr:sgpr(p4) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %g_ptr(p4)
; CHECK-NEXT: %func_ptr:vgpr(p4) = G_LOAD [[COPY]](p4) :: (load (p4))
; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p4), 0, csr_amdgpu
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .4:
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
ADJCALLSTACKUP 0, 0, implicit-def $scc
%g_ptr:_(p4) = COPY $sgpr0_sgpr1
%func_ptr:_(p4) = G_LOAD %g_ptr(p4) :: (load (p4))
$sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu
ADJCALLSTACKDOWN 0, 0, implicit-def $scc
S_SETPC_B64_return undef $sgpr2_sgpr3

...

---
name: waterfall_divergent_call_p0_with_args
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1

; CHECK-LABEL: name: waterfall_divergent_call_p0_with_args
; CHECK: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p0)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p0) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p0), %func_ptr
; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0)
; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0))
; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .4:
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
ADJCALLSTACKUP 0, 0, implicit-def $scc
%g_ptr:_(p0) = COPY $sgpr0_sgpr1
%func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0))
$sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
ADJCALLSTACKDOWN 0, 0, implicit-def $scc
S_SETPC_B64_return undef $sgpr2_sgpr3

...

---
name: waterfall_divergent_call_p4_with_args
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1

; CHECK-LABEL: name: waterfall_divergent_call_p4_with_args
; CHECK: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p4)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p4) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p4), %func_ptr
; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: %g_ptr:sgpr(p4) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %g_ptr(p4)
; CHECK-NEXT: %func_ptr:vgpr(p4) = G_LOAD [[COPY]](p4) :: (load (p4))
; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p4), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .4:
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
ADJCALLSTACKUP 0, 0, implicit-def $scc
%g_ptr:_(p4) = COPY $sgpr0_sgpr1
%func_ptr:_(p4) = G_LOAD %g_ptr(p4) :: (load (p4))
$sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
ADJCALLSTACKDOWN 0, 0, implicit-def $scc
S_SETPC_B64_return undef $sgpr2_sgpr3

...

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx900 -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
; RUN: llc -stop-after=dead-mi-elimination -mtriple=amdgcn-- -mcpu=gfx900 -o - %s | FileCheck --check-prefixes=CHECK,DEADMI %s
; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-- -mcpu=gfx900 -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL
; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-- -mcpu=gfx900 -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL

; CHECK-LABEL: name: basic_call
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -stop-after=irtranslator < %s | FileCheck %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdpal -mcpu=gfx1200 -stop-after=irtranslator < %s | FileCheck %s

define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) {
; CHECK-LABEL: name: basic_test
Expand Down