diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 351e9f25e29cf..ab62e530a18d0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -230,13 +230,6 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler { return AddrReg.getReg(0); } - void assignValueToReg(Register ValVReg, Register PhysReg, - const CCValAssign &VA) override { - MIB.addUse(PhysReg, RegState::Implicit); - Register ExtReg = extendRegisterMin32(*this, ValVReg, VA); - MIRBuilder.buildCopy(PhysReg, ExtReg); - } - void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO, const CCValAssign &VA) override { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 52ca38aca5c77..059b415b75ff1 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3855,10 +3855,14 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned ArgIdx = 0; for (auto [Reg, Val] : RegsToPass) { - if (ArgIdx++ >= NumSpecialInputs && !Val->isDivergent() && - TRI->isSGPRPhysReg(Reg)) { - // Speculatively insert a readfirstlane in case this is a uniform value in - // a VGPR. + if (ArgIdx++ >= NumSpecialInputs && + (IsChainCallConv || !Val->isDivergent()) && TRI->isSGPRPhysReg(Reg)) { + // For chain calls, the inreg arguments are required to be + // uniform. Speculatively Insert a readfirstlane in case we cannot prove + // they are uniform. + // + // For other calls, if an inreg arguments is known to be uniform, + // speculatively insert a readfirstlane in case it is in a VGPR. // // FIXME: We need to execute this in a waterfall loop if it is a divergent // value, so let that continue to produce invalid code. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll index 3438cbdd476d8..4b0ff1b2eb470 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll @@ -24,9 +24,12 @@ define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrsp ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32) @@ -50,9 +53,12 @@ define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrsp ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32) @@ -82,9 +88,12 @@ define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, p ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32) @@ -108,9 +117,12 @@ define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, p ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll index 5effd24a75208..adad38de380d7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -50,7 +50,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -99,8 +100,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](s32) + ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index c3694158e7b97..96c3575e3190c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -942,7 +942,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -3984,8 +3985,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](s32) + ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -5309,7 +5312,8 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 { ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5354,7 +5358,8 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5402,8 +5407,10 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5451,8 +5458,10 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5499,7 +5508,8 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5546,7 +5556,8 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5591,7 +5602,8 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5639,8 +5651,10 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5685,7 +5699,9 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5738,8 +5754,12 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16) ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $sgpr1 = COPY [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5787,8 +5807,12 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5836,8 +5860,10 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p0) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5885,8 +5911,10 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5931,7 +5959,9 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](p3) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY9]](p3) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5983,10 +6013,14 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p1>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $sgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $sgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; CHECK-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32) + ; CHECK-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32) ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) @@ -6034,8 +6068,10 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p5>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll index 06f66e05d6747..8ca3e8255b634 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll @@ -501,6 +501,79 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { unreachable } +; Chain call with SGPR arguments that we cannot prove are uniform. +define amdgpu_cs void @cs_to_chain_nonuniform(<3 x i32> %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: cs_to_chain_nonuniform: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4 +; GISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5 +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: cs_to_chain_nonuniform: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101] +; GISEL-GFX10-NEXT: s_mov_b32 s100, s0 +; GISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 +; GISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5 +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 +; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s0 +; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 +; GISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] +; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: cs_to_chain_nonuniform: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4 +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5 +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: cs_to_chain_nonuniform: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101] +; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0 +; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 +; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5 +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 +; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s0 +; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 +; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] +; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) + unreachable +} + define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { ; GISEL-GFX11-LABEL: chain_to_chain: ; GISEL-GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll index 469d0453b9dfb..75616d276754c 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll @@ -20,9 +20,15 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -30,8 +36,8 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -44,20 +50,26 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -136,9 +148,15 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -146,8 +164,8 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -160,20 +178,26 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -252,9 +276,15 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -262,8 +292,8 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -276,20 +306,26 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -368,9 +404,15 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -378,8 +420,8 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -392,20 +434,26 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -487,9 +535,15 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] @@ -510,15 +564,21 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect @@ -613,9 +673,15 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY6]] @@ -623,8 +689,8 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: non_imm_exec ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -638,20 +704,26 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY7]] - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY8]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY11]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY9]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY12]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: non_imm_exec ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -734,9 +806,15 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i32 i ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY4]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY7]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY8]] @@ -758,15 +836,21 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i32 i ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY4]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY8]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY9]] - ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] + ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY13]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, [[COPY2]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect_with_non_imm_exec diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll index 51c28a02b7f82..6deac9f55f320 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll @@ -20,9 +20,15 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -30,8 +36,8 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -44,20 +50,26 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -136,9 +148,15 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -146,8 +164,8 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -160,20 +178,26 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -252,9 +276,15 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -262,8 +292,8 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -276,20 +306,26 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -368,9 +404,15 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -378,8 +420,8 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -392,20 +434,26 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -487,9 +535,15 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] @@ -510,15 +564,21 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect @@ -615,9 +675,15 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] @@ -625,8 +691,8 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY9]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY12]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: non_imm_exec ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -642,20 +708,26 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY13]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: non_imm_exec ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -744,9 +816,15 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i64 i ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY4]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY7]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY8]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY9]] @@ -770,15 +848,21 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i64 i ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY4]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY8]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY9]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY10]] - ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY11]] + ; GISEL-GFX10-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY14]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, [[REG_SEQUENCE1]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect_with_non_imm_exec