Skip to content

Commit cfc74dd

Browse files
authored
AMDGPU: Constrain readfirstlane operand when writing to m0 (#168004)
Fixes another verifier error after introducing AV registers. Also fixes not clearing the subregister index if there was one.
1 parent 1122581 commit cfc74dd

File tree

3 files changed

+53
-5
lines changed

3 files changed

+53
-5
lines changed

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -902,14 +902,28 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
902902
// really much we can do to fix this.
903903
// Some special instructions use M0 as an input. Some even only use
904904
// the first lane. Insert a readfirstlane and hope for the best.
905-
if (DstReg == AMDGPU::M0 &&
906-
TRI->hasVectorRegisters(MRI->getRegClass(SrcReg))) {
905+
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
906+
if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
907907
Register TmpReg =
908908
MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
909-
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
910-
TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
909+
910+
const MCInstrDesc &ReadFirstLaneDesc =
911+
TII->get(AMDGPU::V_READFIRSTLANE_B32);
912+
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), ReadFirstLaneDesc, TmpReg)
911913
.add(MI.getOperand(1));
914+
915+
unsigned SubReg = MI.getOperand(1).getSubReg();
912916
MI.getOperand(1).setReg(TmpReg);
917+
MI.getOperand(1).setSubReg(AMDGPU::NoSubRegister);
918+
919+
const TargetRegisterClass *OpRC = TII->getRegClass(ReadFirstLaneDesc, 1);
920+
const TargetRegisterClass *ConstrainRC =
921+
SubReg == AMDGPU::NoSubRegister
922+
? OpRC
923+
: TRI->getMatchingSuperRegClass(SrcRC, OpRC, SubReg);
924+
925+
if (!MRI->constrainRegClass(SrcReg, ConstrainRC))
926+
llvm_unreachable("failed to constrain register");
913927
} else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(),
914928
MI, MI.getDebugLoc())) {
915929
I = std::next(I);

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,19 @@ bb16: ; preds = %bb16, %bb
4949
br label %bb16
5050
}
5151

52-
52+
define void @av_class_to_m0(ptr addrspace(1) %ptr) {
53+
; CHECK-LABEL: av_class_to_m0:
54+
; CHECK: ; %bb.0:
55+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56+
; CHECK-NEXT: global_load_dword v0, v[0:1], off
57+
; CHECK-NEXT: s_waitcnt vmcnt(0)
58+
; CHECK-NEXT: v_readfirstlane_b32 s4, v0
59+
; CHECK-NEXT: s_mov_b32 m0, s4
60+
; CHECK-NEXT: ;;#ASMSTART
61+
; CHECK-NEXT: ; use m0
62+
; CHECK-NEXT: ;;#ASMEND
63+
; CHECK-NEXT: s_setpc_b64 s[30:31]
64+
%load = load i32, ptr addrspace(1) %ptr
65+
call void asm sideeffect "; use $0", "{m0}"(i32 %load)
66+
ret void
67+
}

llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,22 @@ body: |
9090
S_ENDPGM 0
9191
...
9292

93+
---
94+
name: constrain_readfirstlane_av64_subreg_m0
95+
tracksRegLiveness: true
96+
body: |
97+
bb.0:
98+
liveins: $vgpr0_vgpr1
99+
100+
; CHECK-LABEL: name: constrain_readfirstlane_av64_subreg_m0
101+
; CHECK: liveins: $vgpr0_vgpr1
102+
; CHECK-NEXT: {{ $}}
103+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
104+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
105+
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]].sub0, implicit $exec
106+
; CHECK-NEXT: $m0 = COPY [[V_READFIRSTLANE_B32_]]
107+
%0:sreg_32 = IMPLICIT_DEF
108+
%1:av_64 = COPY $vgpr0_vgpr1
109+
$m0 = COPY %1.sub0
110+
...
111+

0 commit comments

Comments
 (0)