-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU][GISel] Add RegBankLegalize support for G_SI_CALL #165747
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Chinmay Deshpande (chinmaydd) ChangesPatch is 24.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165747.diff 16 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index b84c30ecaac0b..547b1181d72d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -818,6 +818,22 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
return lowerUnpackAExt(MI);
case WidenMMOToS32:
return widenMMOToS32(cast<GAnyLoad>(MI));
+ case WaterfallCall: {
+ SmallSet<Register, 4> SGPROperandRegs;
+ SGPROperandRegs.insert(MI.getOperand(1).getReg());
+
+ MachineBasicBlock::iterator Start(&MI);
+ while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
+ --Start;
+ MachineBasicBlock::iterator End(&MI);
+ while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
+ ++End;
+ ++End;
+ B.setInsertPt(B.getMBB(), Start);
+
+ executeInWaterfallLoop(B, make_range(Start, End), SGPROperandRegs);
+ break;
+ }
}
if (!WaterfallSgprs.empty()) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 01abd358ff595..d697d4b7ae998 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -170,6 +170,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
case _:
return true;
+ case PhysReg:
+ return true;
default:
llvm_unreachable("missing matchUniformityAndLLT");
}
@@ -915,6 +917,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}});
+ addRulesForGOpcs({G_SI_CALL})
+ .Any({{PhysReg, UniP0}, {{None}, {SgprP0}}})
+ .Any({{PhysReg, DivP0}, {{None}, {VgprP0}, WaterfallCall}})
+ .Any({{PhysReg, UniP4}, {{None}, {SgprP4}}})
+ .Any({{PhysReg, DivP4}, {{None}, {VgprP4}, WaterfallCall}});
+
bool hasSALUFloat = ST->hasSALUFloatInsts();
addRulesForGOpcs({G_FADD}, Standard)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 030bd75f8cd10..cd4d3072aa8cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -38,6 +38,7 @@ bool isAnyPtr(LLT Ty, unsigned Width);
// be checked.
enum UniformityLLTOpPredicateID {
_,
+ PhysReg,
// scalars
S1,
S16,
@@ -134,6 +135,7 @@ enum RegBankLLTMappingApplyID {
Sgpr32,
Sgpr64,
Sgpr128,
+ SgprP0,
SgprP1,
SgprP3,
SgprP4,
@@ -224,7 +226,8 @@ enum LoweringMethodID {
SplitLoad,
WidenLoad,
WidenMMOToS32,
- UnpackAExt
+ UnpackAExt,
+ WaterfallCall
};
enum FastRulesTypes {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
index 4098f643831f1..ca9d7854fb619 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -o - %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -new-reg-bank-select -o - %s | FileCheck %s
; TODO: Could potentially insert it here
define void @arg_align_8(ptr addrspace(1) align 8 %arg0) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index 1bf2a589cb597..02ce52b7450f6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
; Test that we don't insert code to pass implicit arguments we know
; the callee does not need.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index d69515591ecee..2bb059a0fbb1e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX900 %s
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX908 %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX900 %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX908 %s
; Workitem IDs are passed to the kernel differently for gfx908
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
index 6bfd0f060aa20..6694ad741cf48 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
; amdgpu_gfx calling convention
declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 6573088a41fc2..bb6d1f8d161cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s
declare i1 @external_i1_func_void() #0
declare zeroext i1 @external_i1_zeroext_func_void() #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
index 070d35a99d62a..0dc9068c1984b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s
declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 4e70c15df5741..4a5a009f406fa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -global-isel-abort=2 -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -new-reg-bank-select -global-isel-abort=2 -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
declare hidden void @external_void_func_void() #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index af9bcc40dc55e..5dc6932837d79 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 7b2e3bf13c368..24e85aa280bf6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; This is a copy of sibling-call.ll, but stops after the IRTranslator.
define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir
index cbfa1c7c741b8..ed0330957ae9c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs -run-pass=localizer -o - %s | FileCheck %s
+# RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs -run-pass=localizer -o - %s | FileCheck %s
# Previously this was placing the new G_CONSTANT after the use call
---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir
new file mode 100644
index 0000000000000..5207d992ea74d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir
@@ -0,0 +1,215 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize -o - %s | FileCheck %s
+
+---
+name: waterfall_divergent_call_p0_no_args
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: waterfall_divergent_call_p0_no_args
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p0)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p0) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p0), %func_ptr
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .2:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0)
+ ; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0))
+ ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .4:
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
+ ADJCALLSTACKUP 0, 0, implicit-def $scc
+ %g_ptr:_(p0) = COPY $sgpr0_sgpr1
+ %func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0))
+ $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ S_SETPC_B64_return undef $sgpr2_sgpr3
+
+...
+
+---
+name: waterfall_divergent_call_p4_no_args
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: waterfall_divergent_call_p4_no_args
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p4)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p4) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p4), %func_ptr
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .2:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: %g_ptr:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %g_ptr(p4)
+ ; CHECK-NEXT: %func_ptr:vgpr(p4) = G_LOAD [[COPY]](p4) :: (load (p4))
+ ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p4), 0, csr_amdgpu
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .4:
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
+ ADJCALLSTACKUP 0, 0, implicit-def $scc
+ %g_ptr:_(p4) = COPY $sgpr0_sgpr1
+ %func_ptr:_(p4) = G_LOAD %g_ptr(p4) :: (load (p4))
+ $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ S_SETPC_B64_return undef $sgpr2_sgpr3
+
+...
+
+---
+name: waterfall_divergent_call_p0_with_args
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: waterfall_divergent_call_p0_with_args
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p0)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p0) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p0), %func_ptr
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .2:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0)
+ ; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0))
+ ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .4:
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
+ ADJCALLSTACKUP 0, 0, implicit-def $scc
+ %g_ptr:_(p0) = COPY $sgpr0_sgpr1
+ %func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0))
+ $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ S_SETPC_B64_return undef $sgpr2_sgpr3
+
+...
+
+---
+name: waterfall_divergent_call_p4_with_args
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: waterfall_divergent_call_p4_with_args
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ;...
[truncated]
|
petar-avramovic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are there any end to end .ll tests left then? Maybe add one you used to generate mir test as .ll test
| @@ -1,5 +1,5 @@ | |||
| ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | |||
| ; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s | |||
| ; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
-new-reg-bank-select
has no impact for tests with -stop-after=irtranslator, it is later in the pipeline
| @@ -1,5 +1,5 @@ | |||
| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | |||
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs -run-pass=localizer -o - %s | FileCheck %s | |||
| # RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs -run-pass=localizer -o - %s | FileCheck %s | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
-global-isel -new-reg-bank-select
has no impact on -run-pass either
Would you not consider the |
Need some test without stop-after, you could probably use same test functions from irtransaltor tests just run them to the end to see the ISA and if we don't run into any problems. |
|
Raised #165898 for pre-committing relevant tests. |
|
#165898 was merged. But enabling those tests with |
No description provided.