Skip to content

Commit f70f3ba

Browse files
committed
Fix the test failure with this PR, update AMDGPU code that did not set earlyclobber
1 parent ec793aa commit f70f3ba

14 files changed

+5118
-5053
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,13 @@ bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI,
116116
if (!DstRC || DstRC != SrcRC)
117117
return false;
118118

119-
return RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) &&
120-
RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
119+
auto result = RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) &&
120+
RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
121+
const MCInstrDesc &MCID = MI.getDesc();
122+
if (MCID.getOperandConstraint(0, MCOI::EARLY_CLOBBER) != -1) {
123+
MI.getOperand(0).setIsEarlyClobber(true);
124+
}
125+
return result;
121126
}
122127

123128
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
@@ -593,6 +598,7 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
593598
I.setDesc(TII.get(Opc));
594599
I.addOperand(*MF, MachineOperand::CreateImm(0));
595600
I.addImplicitDefUseOperands(*MF);
601+
I.getOperand(0).setIsEarlyClobber(true);
596602
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
597603
}
598604

@@ -3795,6 +3801,10 @@ bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const {
37953801
MI.removeOperand(1); // Intrinsic ID
37963802
MI.addOperand(VDst_In); // Readd VDst_In to the end
37973803
MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
3804+
const MCInstrDesc &MCID = MI.getDesc();
3805+
if (MCID.getOperandConstraint(0, MCOI::EARLY_CLOBBER) != -1) {
3806+
MI.getOperand(0).setIsEarlyClobber(true);
3807+
}
37983808
return true;
37993809
}
38003810

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,10 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
687687
if (!TII->isOperandLegal(*MI, OpNo, &New))
688688
return false;
689689

690+
const MCInstrDesc &MCID = MI->getDesc();
691+
if (MCID.getOperandConstraint(0, MCOI::EARLY_CLOBBER) != -1) {
692+
MI->getOperand(0).setIsEarlyClobber(true);
693+
}
690694
Old.ChangeToImmediate(*ImmVal);
691695
return true;
692696
}

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ define amdgpu_kernel void @set_inactive_imm_poison(ptr addrspace(1) %out) {
2727
; GCN: ; %bb.0:
2828
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
2929
; GCN-NEXT: v_mov_b32_e32 v0, 1
30-
; GCN-NEXT: v_mov_b32_e32 v0, v0
30+
; GCN-NEXT: v_mov_b32_e32 v1, v0
3131
; GCN-NEXT: s_mov_b32 s2, -1
3232
; GCN-NEXT: s_mov_b32 s3, 0xf000
3333
; GCN-NEXT: s_waitcnt lgkmcnt(0)
34-
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
34+
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
3535
; GCN-NEXT: s_endpgm
3636
%tmp.0 = call i32 @llvm.amdgcn.set.inactive.i32(i32 1, i32 poison) #0
3737
%tmp = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp.0)
@@ -68,12 +68,12 @@ define amdgpu_kernel void @set_inactive_imm_poison_64(ptr addrspace(1) %out) {
6868
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
6969
; GCN-NEXT: v_mov_b32_e32 v0, 1
7070
; GCN-NEXT: v_mov_b32_e32 v1, 0
71-
; GCN-NEXT: v_mov_b32_e32 v0, v0
72-
; GCN-NEXT: v_mov_b32_e32 v1, v1
71+
; GCN-NEXT: v_mov_b32_e32 v2, v0
72+
; GCN-NEXT: v_mov_b32_e32 v3, v1
7373
; GCN-NEXT: s_mov_b32 s2, -1
7474
; GCN-NEXT: s_mov_b32 s3, 0xf000
7575
; GCN-NEXT: s_waitcnt lgkmcnt(0)
76-
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
76+
; GCN-NEXT: buffer_store_dwordx2 v[2:3], off, s[0:3], 0
7777
; GCN-NEXT: s_endpgm
7878
%tmp.0 = call i64 @llvm.amdgcn.set.inactive.i64(i64 1, i64 poison) #0
7979
%tmp = call i64 @llvm.amdgcn.strict.wwm.i64(i64 %tmp.0)

llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,10 @@ define amdgpu_kernel void @v_mul_i64_zext_src0_src1(ptr addrspace(1) %out, ptr a
165165
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
166166
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
167167
; GFX10-NEXT: s_clause 0x1
168-
; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
169-
; GFX10-NEXT: global_load_dword v2, v0, s[6:7]
168+
; GFX10-NEXT: global_load_dword v2, v0, s[2:3]
169+
; GFX10-NEXT: global_load_dword v3, v0, s[6:7]
170170
; GFX10-NEXT: s_waitcnt vmcnt(0)
171-
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v1, v2, 0
171+
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v2, v3, 0
172172
; GFX10-NEXT: v_mov_b32_e32 v2, 0
173173
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
174174
; GFX10-NEXT: s_endpgm
@@ -179,15 +179,15 @@ define amdgpu_kernel void @v_mul_i64_zext_src0_src1(ptr addrspace(1) %out, ptr a
179179
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
180180
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
181181
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
182-
; GFX11-NEXT: v_mov_b32_e32 v2, 0
183-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
182+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
184183
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
185184
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
186185
; GFX11-NEXT: s_clause 0x1
187-
; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
188-
; GFX11-NEXT: global_load_b32 v0, v0, s[4:5]
186+
; GFX11-NEXT: global_load_b32 v2, v0, s[2:3]
187+
; GFX11-NEXT: global_load_b32 v3, v0, s[4:5]
189188
; GFX11-NEXT: s_waitcnt vmcnt(0)
190-
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, 0
189+
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, v3, 0
190+
; GFX11-NEXT: v_mov_b32_e32 v2, 0
191191
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
192192
; GFX11-NEXT: s_endpgm
193193
%tid = call i32 @llvm.amdgcn.workitem.id.x()

0 commit comments

Comments
 (0)