Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion llvm/include/llvm/CodeGen/MachineRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -729,9 +729,17 @@ class MachineRegisterInfo {
bool constrainRegAttrs(Register Reg, Register ConstrainingReg,
unsigned MinNumRegs = 0);

/// getLargestConstrainedSuperClass - Try to find a legal super-class of Reg's
/// register class that still satisfies the constraints from the instructions
/// using
/// \p Reg. \p return the super-class TargetRegisterClass if one was found,
/// otherwise \p return the original TargetRegisterClass.
const TargetRegisterClass *
getLargestConstrainedSuperClass(Register Reg) const;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix (or remove) the function name in the comment, line 732.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should remove the function name, this hasn't been necessary in doxygen for a long time.

Also should probably name this computeLargestConstrainedSuperClass? It's not simple get


/// recomputeRegClass - Try to find a legal super-class of Reg's register
/// class that still satisfies the constraints from the instructions using
/// Reg. Returns true if Reg was upgraded.
/// \p Reg. \p return true if Reg was upgraded.
///
/// This method can be used after constraints have been removed from a
/// virtual register, for example after removing instructions or splitting
Expand Down
16 changes: 12 additions & 4 deletions llvm/lib/CodeGen/MachineRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,16 @@ MachineRegisterInfo::constrainRegAttrs(Register Reg,
return true;
}

bool
MachineRegisterInfo::recomputeRegClass(Register Reg) {
const TargetRegisterClass *
MachineRegisterInfo::getLargestConstrainedSuperClass(Register Reg) const {
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
const TargetRegisterClass *OldRC = getRegClass(Reg);
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC, *MF);

// Stop early if there is no room to grow.
if (NewRC == OldRC)
return false;
return NewRC;

// Accumulate constraints from all uses.
for (MachineOperand &MO : reg_nodbg_operands(Reg)) {
Expand All @@ -136,8 +136,16 @@ MachineRegisterInfo::recomputeRegClass(Register Reg) {
unsigned OpNo = &MO - &MI->getOperand(0);
NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, TII, TRI);
if (!NewRC || NewRC == OldRC)
return false;
return OldRC;
}
return NewRC;
}

bool MachineRegisterInfo::recomputeRegClass(Register Reg) {
const TargetRegisterClass *OldRC = getRegClass(Reg);
const TargetRegisterClass *NewRC = getLargestConstrainedSuperClass(Reg);
if (NewRC == OldRC)
return false;
Comment on lines +147 to +148
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this return value actually useful anywhere?

setRegClass(Reg, NewRC);
return true;
}
Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/CodeGen/RegisterCoalescer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,9 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
Flipped = true;
}

const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
const MachineFunction *MF = MI->getMF();

const MachineRegisterInfo &MRI = MF->getRegInfo();
Comment on lines +480 to +482
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably should just pass this as an argument to setRegisters

const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);

if (Dst.isPhysical()) {
Expand Down Expand Up @@ -515,6 +517,12 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
// SrcReg will be merged with a sub-register of DstReg.
SrcIdx = DstSub;
NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
if (!NewRC) {
auto SuperSrcRC = MRI.getLargestConstrainedSuperClass(Src);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No auto

if (SuperSrcRC != SrcRC) {
NewRC = TRI.getMatchingSuperRegClass(DstRC, SuperSrcRC, DstSub);
}
}
} else if (SrcSub) {
// DstReg will be merged with a sub-register of SrcReg.
DstIdx = SrcSub;
Expand Down
293 changes: 117 additions & 176 deletions llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir

Large diffs are not rendered by default.

60 changes: 30 additions & 30 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -160,22 +160,22 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac(<8 x bfloat> %arg0, <8 x b
; GCN-LABEL: test_mfma_f32_32x32x16_bf16__mac:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_accvgpr_write_b32 a0, v8
; GCN-NEXT: v_accvgpr_write_b32 a1, v9
; GCN-NEXT: v_accvgpr_write_b32 a2, v10
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
; GCN-NEXT: v_accvgpr_write_b32 a4, v12
; GCN-NEXT: v_accvgpr_write_b32 a5, v13
; GCN-NEXT: v_accvgpr_write_b32 a6, v14
; GCN-NEXT: v_accvgpr_write_b32 a7, v15
; GCN-NEXT: v_accvgpr_write_b32 a8, v16
; GCN-NEXT: v_accvgpr_write_b32 a9, v17
; GCN-NEXT: v_accvgpr_write_b32 a10, v18
; GCN-NEXT: v_accvgpr_write_b32 a11, v19
; GCN-NEXT: v_accvgpr_write_b32 a12, v20
; GCN-NEXT: v_accvgpr_write_b32 a13, v21
; GCN-NEXT: v_accvgpr_write_b32 a14, v22
; GCN-NEXT: v_accvgpr_write_b32 a15, v23
; GCN-NEXT: v_accvgpr_write_b32 a14, v22
; GCN-NEXT: v_accvgpr_write_b32 a13, v21
; GCN-NEXT: v_accvgpr_write_b32 a12, v20
; GCN-NEXT: v_accvgpr_write_b32 a11, v19
; GCN-NEXT: v_accvgpr_write_b32 a10, v18
; GCN-NEXT: v_accvgpr_write_b32 a9, v17
; GCN-NEXT: v_accvgpr_write_b32 a8, v16
; GCN-NEXT: v_accvgpr_write_b32 a7, v15
; GCN-NEXT: v_accvgpr_write_b32 a6, v14
; GCN-NEXT: v_accvgpr_write_b32 a5, v13
; GCN-NEXT: v_accvgpr_write_b32 a4, v12
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
; GCN-NEXT: v_accvgpr_write_b32 a2, v10
; GCN-NEXT: v_accvgpr_write_b32 a1, v9
; GCN-NEXT: v_accvgpr_write_b32 a0, v8
; GCN-NEXT: s_nop 1
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
; GCN-NEXT: s_nop 7
Expand Down Expand Up @@ -205,22 +205,22 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac__flags(<8 x bfloat> %arg0,
; GCN-LABEL: test_mfma_f32_32x32x16_bf16__mac__flags:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_accvgpr_write_b32 a0, v8
; GCN-NEXT: v_accvgpr_write_b32 a1, v9
; GCN-NEXT: v_accvgpr_write_b32 a2, v10
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
; GCN-NEXT: v_accvgpr_write_b32 a4, v12
; GCN-NEXT: v_accvgpr_write_b32 a5, v13
; GCN-NEXT: v_accvgpr_write_b32 a6, v14
; GCN-NEXT: v_accvgpr_write_b32 a7, v15
; GCN-NEXT: v_accvgpr_write_b32 a8, v16
; GCN-NEXT: v_accvgpr_write_b32 a9, v17
; GCN-NEXT: v_accvgpr_write_b32 a10, v18
; GCN-NEXT: v_accvgpr_write_b32 a11, v19
; GCN-NEXT: v_accvgpr_write_b32 a12, v20
; GCN-NEXT: v_accvgpr_write_b32 a13, v21
; GCN-NEXT: v_accvgpr_write_b32 a14, v22
; GCN-NEXT: v_accvgpr_write_b32 a15, v23
; GCN-NEXT: v_accvgpr_write_b32 a14, v22
; GCN-NEXT: v_accvgpr_write_b32 a13, v21
; GCN-NEXT: v_accvgpr_write_b32 a12, v20
; GCN-NEXT: v_accvgpr_write_b32 a11, v19
; GCN-NEXT: v_accvgpr_write_b32 a10, v18
; GCN-NEXT: v_accvgpr_write_b32 a9, v17
; GCN-NEXT: v_accvgpr_write_b32 a8, v16
; GCN-NEXT: v_accvgpr_write_b32 a7, v15
; GCN-NEXT: v_accvgpr_write_b32 a6, v14
; GCN-NEXT: v_accvgpr_write_b32 a5, v13
; GCN-NEXT: v_accvgpr_write_b32 a4, v12
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
; GCN-NEXT: v_accvgpr_write_b32 a2, v10
; GCN-NEXT: v_accvgpr_write_b32 a1, v9
; GCN-NEXT: v_accvgpr_write_b32 a0, v8
; GCN-NEXT: s_nop 1
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
; GCN-NEXT: s_nop 7
Expand Down
Loading