Skip to content

Commit 4215579

Browse files
authored
[AMDGPU] Use glue for convergence tokens at call-like operations (#86766)
The earlier implementation on AMDGPU used explicit token operands at SI_CALL and SI_CALL_ISEL. This is now replaced with CONVERGENCECTRL_GLUE operands, with the following effects: - The treatment of tokens at call-like operations is now consistent with the treatment at intrinsics. - Support for tail calls using implicit tokens at SI_TCRETURN "just works". - The extra parameter at call-like instructions is eliminated, thus restoring those instructions and their handling to the original state. The new glue node is placed after the existing glue node for the outgoing call parameters, which seems to not interfere with selection of the call-like nodes.
1 parent 20f56e1 commit 4215579

15 files changed

+81
-140
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8612,8 +8612,6 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
86128612
if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
86138613
auto *Token = Bundle->Inputs[0].get();
86148614
ConvControlToken = getValue(Token);
8615-
} else {
8616-
ConvControlToken = DAG.getUNDEF(MVT::Untyped);
86178615
}
86188616

86198617
TargetLowering::CallLoweringInfo CLI(DAG);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3833,9 +3833,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
38333833
Ops.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
38343834
}
38353835

3836-
if (!IsTailCall)
3837-
Ops.push_back(CLI.ConvergenceControlToken);
3838-
38393836
if (IsTailCall) {
38403837
// Each tail call may have to adjust the stack by a different amount, so
38413838
// this information must travel along with the operation for eventual
@@ -3862,6 +3859,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
38623859
if (InGlue.getNode())
38633860
Ops.push_back(InGlue);
38643861

3862+
// NOTE: This potentially results in *two* glue operands, and the wrong one
3863+
// might possibly show up where the other was intended. In particular,
3864+
// Emitter::EmitMachineNode() expects only the glued convergence token if it
3865+
// exists. Similarly, the selection of the call expects to match only the
3866+
// InGlue operand if it exists.
3867+
if (SDValue Token = CLI.ConvergenceControlToken) {
3868+
Ops.push_back(SDValue(DAG.getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE,
3869+
DL, MVT::Glue, Token),
3870+
0));
3871+
}
3872+
38653873
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
38663874

38673875
// If we're doing a tall call, use a TC_RETURN here rather than an
@@ -5226,24 +5234,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
52265234
MachineInstrBuilder MIB;
52275235
MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg);
52285236

5229-
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
5230-
MachineOperand &MO = MI.getOperand(I);
5231-
if (I != 2) {
5232-
MIB.add(MO);
5233-
continue;
5234-
}
5235-
}
5236-
5237-
MachineOperand &MO = MI.getOperand(2);
5238-
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5239-
// The token operand is always a register, whose definition is IMPLICIT_DEF
5240-
// iff there was no token on the call.
5241-
if (MachineInstr *Def = MRI.getVRegDef(MO.getReg())) {
5242-
if (Def->getOpcode() != TargetOpcode::IMPLICIT_DEF) {
5243-
MO.setImplicit();
5244-
MIB.add(MO);
5245-
}
5246-
}
5237+
for (const MachineOperand &MO : MI.operands())
5238+
MIB.add(MO);
52475239

52485240
MIB.cloneMemRefs(MI);
52495241
MI.eraseFromParent();

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -618,8 +618,8 @@ def SI_RETURN : SPseudoInstSI <
618618
// This version is only needed so we can fill in the output register
619619
// in the custom inserter.
620620
def SI_CALL_ISEL : SPseudoInstSI <
621-
(outs), (ins SSrc_b64:$src0, unknown:$callee, unknown:$token),
622-
[(AMDGPUcall i64:$src0, tglobaladdr:$callee, untyped:$token)]> {
621+
(outs), (ins SSrc_b64:$src0, unknown:$callee),
622+
[(AMDGPUcall i64:$src0, tglobaladdr:$callee)]> {
623623
let Size = 4;
624624
let isCall = 1;
625625
let SchedRW = [WriteBranch];
@@ -629,8 +629,8 @@ def SI_CALL_ISEL : SPseudoInstSI <
629629
}
630630

631631
def : GCNPat<
632-
(AMDGPUcall i64:$src0, (i64 0), untyped:$token),
633-
(SI_CALL_ISEL $src0, (i64 0), untyped:$token)
632+
(AMDGPUcall i64:$src0, (i64 0)),
633+
(SI_CALL_ISEL $src0, (i64 0))
634634
>;
635635

636636
// Wrapper around s_swappc_b64 with extra $callee parameter to track

llvm/test/CodeGen/AMDGPU/convergence-tokens.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
; CHECK-LABEL: name: basic_call
66
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
7-
; ISEL: {{.*}} SI_CALL_ISEL {{.*}}, @foo, [[TOKEN]], csr_amdgpu, {{.*}}
7+
; ISEL: {{.*}} SI_CALL_ISEL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
88
; DEADMI: {{.*}} SI_CALL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
99
; GISEL: {{.*}} G_SI_CALL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
1010
define i32 @basic_call(i32 %src) #0 {
@@ -92,15 +92,9 @@ define i32 @nested(i32 %src) #0 {
9292
ret i32 %sum
9393
}
9494

95-
; COM: FIXME: Tokens on tail-call have not been implemented for SelectionDAG
96-
; COM: yet; the corresponding checks have been commented out.
97-
;
9895
; CHECK-LABEL: name: tail_call_void_func_void
99-
; GISEL: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
100-
; COM: CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
101-
; COM: ISEL: {{.*}} SI_CALL_ISEL {{.*}}, @external_void_func_void, [[TOKEN]], csr_amdgpu, {{.*}}
102-
; COM: DEADMI: {{.*}} SI_CALL {{.*}}, @external_void_func_void, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
103-
; GISEL: {{.*}} SI_TCRETURN {{.*}}, @external_void_func_void, 0, csr_amdgpu, implicit [[TOKEN]]
96+
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
97+
; CHECK: {{.*}} SI_TCRETURN {{.*}}, @external_void_func_void, 0, csr_amdgpu, {{.*}}implicit [[TOKEN]]
10498
define void @tail_call_void_func_void() #0 {
10599
%t1 = call token @llvm.experimental.convergence.entry()
106100
tail call void @external_void_func_void() [ "convergencectrl"(token %t1) ]

llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc(<4 x i32> inreg %a, <4 x i32> %b
9292
; DAGISEL-GFX11-NEXT: $vgpr5 = COPY [[COPY2]]
9393
; DAGISEL-GFX11-NEXT: $vgpr6 = COPY [[COPY1]]
9494
; DAGISEL-GFX11-NEXT: $vgpr7 = COPY [[COPY]]
95-
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
9695
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
9796
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
9897
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
@@ -122,7 +121,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc(<4 x i32> inreg %a, <4 x i32> %b
122121
; DAGISEL-GFX10-NEXT: $vgpr5 = COPY [[COPY2]]
123122
; DAGISEL-GFX10-NEXT: $vgpr6 = COPY [[COPY1]]
124123
; DAGISEL-GFX10-NEXT: $vgpr7 = COPY [[COPY]]
125-
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
126124
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
127125
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
128126
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
@@ -234,7 +232,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_ptr(ptr inreg %a, ptr %b, ptr ad
234232
; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]]
235233
; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]]
236234
; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]]
237-
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
238235
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
239236
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
240237
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
@@ -272,7 +269,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_ptr(ptr inreg %a, ptr %b, ptr ad
272269
; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]]
273270
; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]]
274271
; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]]
275-
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
276272
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
277273
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
278274
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
@@ -404,7 +400,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_struct( {ptr, i32, <4 x i32>} in
404400
; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY2]]
405401
; DAGISEL-GFX11-NEXT: $vgpr12 = COPY [[COPY1]]
406402
; DAGISEL-GFX11-NEXT: $vgpr13 = COPY [[COPY]]
407-
; DAGISEL-GFX11-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
408403
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13
409404
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
410405
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
@@ -454,7 +449,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_struct( {ptr, i32, <4 x i32>} in
454449
; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY2]]
455450
; DAGISEL-GFX10-NEXT: $vgpr12 = COPY [[COPY1]]
456451
; DAGISEL-GFX10-NEXT: $vgpr13 = COPY [[COPY]]
457-
; DAGISEL-GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
458452
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13
459453
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
460454
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
@@ -506,7 +500,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_float(float inreg %a, float %b)
506500
; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
507501
; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
508502
; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]]
509-
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
510503
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1
511504
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
512505
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
@@ -524,7 +517,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_float(float inreg %a, float %b)
524517
; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
525518
; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]]
526519
; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]]
527-
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
528520
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
529521
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
530522
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
@@ -576,7 +568,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_half(half inreg %a, half %b) {
576568
; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
577569
; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
578570
; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]]
579-
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
580571
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1
581572
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
582573
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
@@ -594,7 +585,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_half(half inreg %a, half %b) {
594585
; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
595586
; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]]
596587
; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]]
597-
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
598588
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
599589
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
600590
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
@@ -646,7 +636,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_bfloat(bfloat inreg %a, bfloat %
646636
; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
647637
; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
648638
; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]]
649-
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
650639
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1
651640
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
652641
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
@@ -664,7 +653,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_bfloat(bfloat inreg %a, bfloat %
664653
; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
665654
; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]]
666655
; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]]
667-
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
668656
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
669657
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
670658
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
@@ -716,7 +704,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_i16(i16 inreg %a, i16 %b) {
716704
; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
717705
; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
718706
; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]]
719-
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
720707
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1
721708
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
722709
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
@@ -734,7 +721,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_i16(i16 inreg %a, i16 %b) {
734721
; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
735722
; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]]
736723
; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]]
737-
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
738724
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
739725
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
740726
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
@@ -870,7 +856,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_v16i16(<16 x i16> inreg %a, <16
870856
; DAGISEL-GFX11-NEXT: $vgpr13 = COPY [[COPY2]]
871857
; DAGISEL-GFX11-NEXT: $vgpr14 = COPY [[COPY1]]
872858
; DAGISEL-GFX11-NEXT: $vgpr15 = COPY [[COPY]]
873-
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
874859
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
875860
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
876861
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
@@ -916,7 +901,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_v16i16(<16 x i16> inreg %a, <16
916901
; DAGISEL-GFX10-NEXT: $vgpr13 = COPY [[COPY2]]
917902
; DAGISEL-GFX10-NEXT: $vgpr14 = COPY [[COPY1]]
918903
; DAGISEL-GFX10-NEXT: $vgpr15 = COPY [[COPY]]
919-
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
920904
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
921905
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
922906
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
@@ -2480,7 +2464,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_many_regs(<36 x i32> inreg %a, <128
24802464
; DAGISEL-GFX11-NEXT: $vgpr29 = COPY [[COPY134]]
24812465
; DAGISEL-GFX11-NEXT: $vgpr30 = COPY [[COPY133]]
24822466
; DAGISEL-GFX11-NEXT: $vgpr31 = COPY [[COPY132]]
2483-
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
24842467
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
24852468
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 528, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
24862469
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
@@ -2827,7 +2810,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_many_regs(<36 x i32> inreg %a, <128
28272810
; DAGISEL-GFX10-NEXT: $vgpr29 = COPY [[COPY134]]
28282811
; DAGISEL-GFX10-NEXT: $vgpr30 = COPY [[COPY133]]
28292812
; DAGISEL-GFX10-NEXT: $vgpr31 = COPY [[COPY132]]
2830-
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
28312813
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
28322814
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 528, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
28332815
; DAGISEL-GFX10-NEXT: S_ENDPGM 0

llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
6060
; CHECK-NEXT: ; implicit-def: $sgpr15
6161
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
6262
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
63-
; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19
6463
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
6564
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
6665
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1

0 commit comments

Comments
 (0)