Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions llvm/include/llvm/CodeGen/TargetInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,22 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
return true;
}

/// Returns true if CopyMI should be considered for register
/// definition rematerialization. Otherwise, returns false.
///
/// Rematerialization can replace a source register with its value
/// from its definition. Its applied in the register coalescer,
/// after instruction selection and before register allocation.
///
/// Subtargets can override this method to classify rematerialization
/// candidates. Note that this cannot be defined in tablegen because it
/// operates at a higher level.
virtual bool shouldReMaterializeTrivialRegDef(const MachineInstr *CopyMI,
const Register &DestReg,
const Register &SrcReg) const {
return true;
}

/// Re-issue the specified 'original' instruction at the
/// specific location targeting a new destination register.
/// The register in Orig->getOperand(0).getReg() will be substituted by
Expand Down
42 changes: 42 additions & 0 deletions llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,48 @@ class LLVM_ABI TargetSubtargetInfo : public MCSubtargetInfo {
return false;
}

/// Returns true if CopyMI can be lowered to a zero cycle register move.
/// Otherwise, returns false.
///
/// Lowering to zero cycle register moves depend on the microarchitecture
/// for the specific architectural registers and instructions supported.
/// Thus, currently its applied after register allocation,
/// when `ExpandPostRAPseudos` pass calls `TargetInstrInfo::lowerCopy`
/// which in turn calls `TargetInstrInfo::copyPhysReg`.
///
/// Subtargets can override this method to classify lowering candidates.
/// Note that this cannot be defined in tablegen because it operates at
/// a higher level.
///
/// NOTE: Subtargets must maintain consistency between the logic here and
/// on lowering.
virtual bool canLowerToZeroCycleRegMove(const MachineInstr *CopyMI,
const Register &DestReg,
const Register &SrcReg) const {
return false;
}

/// Returns true if CopyMI can be lowered to a zero cycle register zeroing.
/// Otherwise, returns false.
///
/// Lowering to zero cycle register zeroing depends on the microarchitecture
/// for the specific architectural registers and instructions supported.
/// Thus, currently it takes place after register allocation,
/// when `ExpandPostRAPseudos` pass calls `TargetInstrInfo::lowerCopy`
/// which in turn calls `TargetInstrInfo::copyPhysReg`.
///
/// Subtargets can override this method to classify lowering candidates.
/// Note that this cannot be defined in tablegen because it operates at
/// a higher level.
///
/// NOTE: Subtargets must maintain consistency between the logic here and
/// on lowering.
virtual bool canLowerToZeroCycleRegZeroing(const MachineInstr *CopyMI,
const Register &DestReg,
const Register &SrcReg) const {
return false;
}

/// True if the subtarget should run MachineScheduler after aggressive
/// coalescing.
///
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/RegisterCoalescer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ STATISTIC(numCrossRCs, "Number of cross class joins performed");
STATISTIC(numCommutes, "Number of instruction commuting performed");
STATISTIC(numExtends, "Number of copies extended");
STATISTIC(NumReMats, "Number of instructions re-materialized");
STATISTIC(NumReMatsPrevented,
"Number of instruction rematerialization prevented by "
"`shouldReMaterializeTrivialRegDef` hook");
STATISTIC(NumInflated, "Number of register classes inflated");
STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
Expand Down Expand Up @@ -1400,6 +1403,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx))
return false;

if (!TII->shouldReMaterializeTrivialRegDef(CopyMI, DstReg, SrcReg)) {
LLVM_DEBUG(dbgs() << "Remat prevented: " << CopyIdx << "\t" << *CopyMI);
++NumReMatsPrevented;
return false;
}

DebugLoc DL = CopyMI->getDebugLoc();
MachineBasicBlock *MBB = CopyMI->getParent();
MachineBasicBlock::iterator MII =
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Features.td
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,9 @@ def FeatureZCRegMoveFPR64 : SubtargetFeature<"zcm-fpr64", "HasZeroCycleRegMoveFP
def FeatureZCRegMoveFPR32 : SubtargetFeature<"zcm-fpr32", "HasZeroCycleRegMoveFPR32", "true",
"Has zero-cycle register moves for FPR32 registers">;

def FeatureZCRegMoveFPR128 : SubtargetFeature<"zcm-fpr128", "HasZeroCycleRegMoveFPR128", "true",
"Has zero-cycle register moves for FPR128 registers">;

def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
"Has zero-cycle zeroing instructions for generic registers">;

Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,6 +1029,13 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
}
}

bool AArch64InstrInfo::shouldReMaterializeTrivialRegDef(
const MachineInstr *CopyMI, const Register &DestReg,
const Register &SrcReg) const {
return !Subtarget.canLowerToZeroCycleRegMove(CopyMI, DestReg, SrcReg) &&
!Subtarget.canLowerToZeroCycleRegZeroing(CopyMI, DestReg, SrcReg);
}

bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
Expand Down Expand Up @@ -5025,6 +5032,9 @@ void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
}
}

/// NOTE: must maintain consistency with
/// `AArch64Subtarget::canLowerToZeroCycleRegMove` and
/// `AArch64Subtarget::canLowerToZeroCycleRegZeroing`.
void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register DestReg,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,10 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {

bool isAsCheapAsAMove(const MachineInstr &MI) const override;

bool shouldReMaterializeTrivialRegDef(const MachineInstr *CopyMI,
const Register &DestReg,
const Register &SrcReg) const override;

bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg,
Register &DstReg, unsigned &SubIdx) const override;

Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Processors.td
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
FeatureStorePairSuppress,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCRegMoveFPR128,
FeatureZCZeroing,
FeatureZCZeroingFPWorkaround]>;

Expand All @@ -327,6 +328,7 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
FeatureStorePairSuppress,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCRegMoveFPR128,
FeatureZCZeroing]>;

def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
Expand All @@ -340,6 +342,7 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
FeatureStorePairSuppress,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCRegMoveFPR128,
FeatureZCZeroing]>;

def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
Expand All @@ -353,6 +356,7 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
FeatureStorePairSuppress,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCRegMoveFPR128,
FeatureZCZeroing]>;

def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
Expand All @@ -366,6 +370,7 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
FeatureStorePairSuppress,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCRegMoveFPR128,
FeatureZCZeroing]>;

def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
Expand All @@ -384,6 +389,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
FeatureStorePairSuppress,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCRegMoveFPR128,
FeatureZCZeroing]>;

def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
Expand All @@ -402,6 +408,7 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
FeatureStorePairSuppress,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCRegMoveFPR128,
FeatureZCZeroing]>;

def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
Expand All @@ -420,6 +427,7 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
FeatureStorePairSuppress,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCRegMoveFPR128,
FeatureZCZeroing]>;

def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
Expand All @@ -438,6 +446,7 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
FeatureStorePairSuppress,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCRegMoveFPR128,
FeatureZCZeroing]>;

def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
Expand All @@ -455,6 +464,7 @@ def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
FeatureFuseLiterals,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCRegMoveFPR128,
FeatureZCZeroing
]>;

Expand Down
81 changes: 81 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -667,3 +667,84 @@ AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled(
bool AArch64Subtarget::enableMachinePipeliner() const {
return getSchedModel().hasInstrSchedModel();
}

bool AArch64Subtarget::isRegInClass(const MachineInstr *MI, const Register &Reg,
const TargetRegisterClass *TRC) const {
if (Reg.isPhysical()) {
return TRC->contains(Reg);
} else {
const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
return TRC->hasSubClassEq(MRI.getRegClass(Reg));
}
}

/// NOTE: must maintain consistency with `AArch64InstrInfo::copyPhysReg`.
bool AArch64Subtarget::canLowerToZeroCycleRegMove(
const MachineInstr *CopyMI, const Register &DestReg,
const Register &SrcReg) const {
if (isRegInClass(CopyMI, DestReg, &AArch64::GPR32allRegClass) &&
isRegInClass(CopyMI, SrcReg, &AArch64::GPR32allRegClass) &&
DestReg != AArch64::WZR) {
if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP ||
SrcReg != AArch64::WZR || !hasZeroCycleZeroingGP()) {
return hasZeroCycleRegMoveGPR64() || hasZeroCycleRegMoveGPR32();
}
return false;
}

if (isRegInClass(CopyMI, DestReg, &AArch64::GPR64allRegClass) &&
isRegInClass(CopyMI, SrcReg, &AArch64::GPR64allRegClass) &&
DestReg != AArch64::XZR) {
if (DestReg == AArch64::SP || SrcReg == AArch64::SP ||
SrcReg != AArch64::XZR || !hasZeroCycleZeroingGP()) {
return hasZeroCycleRegMoveGPR64();
}
return false;
}

if (isRegInClass(CopyMI, DestReg, &AArch64::FPR128RegClass) &&
isRegInClass(CopyMI, SrcReg, &AArch64::FPR128RegClass)) {
return isNeonAvailable() && hasZeroCycleRegMoveFPR128();
}

if (isRegInClass(CopyMI, DestReg, &AArch64::FPR64RegClass) &&
isRegInClass(CopyMI, SrcReg, &AArch64::FPR64RegClass)) {
return hasZeroCycleRegMoveFPR64();
}

if (isRegInClass(CopyMI, DestReg, &AArch64::FPR32RegClass) &&
isRegInClass(CopyMI, SrcReg, &AArch64::FPR32RegClass)) {
return hasZeroCycleRegMoveFPR32() || hasZeroCycleRegMoveFPR64();
}

if (isRegInClass(CopyMI, DestReg, &AArch64::FPR16RegClass) &&
isRegInClass(CopyMI, SrcReg, &AArch64::FPR16RegClass)) {
return hasZeroCycleRegMoveFPR32() || hasZeroCycleRegMoveFPR64();
}

if (isRegInClass(CopyMI, DestReg, &AArch64::FPR8RegClass) &&
isRegInClass(CopyMI, SrcReg, &AArch64::FPR8RegClass)) {
return hasZeroCycleRegMoveFPR32() || hasZeroCycleRegMoveFPR64();
}

return false;
}

/// NOTE: must maintain consistency with `AArch64InstrInfo::copyPhysReg`.
bool AArch64Subtarget::canLowerToZeroCycleRegZeroing(
const MachineInstr *CopyMI, const Register &DestReg,
const Register &SrcReg) const {
if (isRegInClass(CopyMI, DestReg, &AArch64::GPR32allRegClass) &&
isRegInClass(CopyMI, SrcReg, &AArch64::GPR32allRegClass) &&
DestReg != AArch64::WZR) {
return AArch64::WZR == SrcReg && hasZeroCycleZeroingGP();
}

if (isRegInClass(CopyMI, DestReg, &AArch64::GPR64allRegClass) &&
isRegInClass(CopyMI, SrcReg, &AArch64::GPR64allRegClass) &&
DestReg != AArch64::XZR) {
return AArch64::XZR == SrcReg && hasZeroCycleZeroingGP();
}

return false;
}
13 changes: 13 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
/// Initialize properties based on the selected processor family.
void initializeProperties(bool HasMinSize);

/// Returns true if Reg is virtual and is assigned to,
/// or is physcial and is a member of, the TRC register class.
/// Otherwise, returns false.
bool isRegInClass(const MachineInstr *MI, const Register &Reg,
const TargetRegisterClass *TRC) const;

public:
/// This constructor initializes the data members to match that
/// of the specified triple.
Expand Down Expand Up @@ -163,6 +169,13 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool enableMachinePipeliner() const override;
bool useDFAforSMS() const override { return false; }

bool canLowerToZeroCycleRegMove(const MachineInstr *CopyMI,
const Register &DestReg,
const Register &SrcReg) const override;
bool canLowerToZeroCycleRegZeroing(const MachineInstr *CopyMI,
const Register &DestReg,
const Register &SrcReg) const override;

/// Returns ARM processor family.
/// Avoid this function! CPU specifics should be kept local to this class
/// and preferably modeled with SubtargetFeatures or properties in
Expand Down
31 changes: 12 additions & 19 deletions llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,18 @@ define i32 @main() nounwind ssp {
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #96
; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill
; CHECK-NEXT: mov w9, #1 ; =0x1
; CHECK-NEXT: mov w8, #2 ; =0x2
; CHECK-NEXT: stp w8, w9, [sp, #72]
; CHECK-NEXT: mov w9, #3 ; =0x3
; CHECK-NEXT: mov w8, #4 ; =0x4
; CHECK-NEXT: stp w8, w9, [sp, #64]
; CHECK-NEXT: mov w9, #5 ; =0x5
; CHECK-NEXT: mov w8, #6 ; =0x6
; CHECK-NEXT: stp w8, w9, [sp, #56]
; CHECK-NEXT: mov w9, #7 ; =0x7
; CHECK-NEXT: mov w8, #8 ; =0x8
; CHECK-NEXT: stp w8, w9, [sp, #48]
; CHECK-NEXT: mov w8, #1 ; =0x1
; CHECK-NEXT: mov w1, #2 ; =0x2
; CHECK-NEXT: stp w1, w8, [sp, #72]
; CHECK-NEXT: mov w2, #3 ; =0x3
; CHECK-NEXT: mov w3, #4 ; =0x4
; CHECK-NEXT: stp w3, w2, [sp, #64]
; CHECK-NEXT: mov w4, #5 ; =0x5
; CHECK-NEXT: mov w5, #6 ; =0x6
; CHECK-NEXT: stp w5, w4, [sp, #56]
; CHECK-NEXT: mov w6, #7 ; =0x7
; CHECK-NEXT: mov w7, #8 ; =0x8
; CHECK-NEXT: stp w7, w6, [sp, #48]
; CHECK-NEXT: mov w8, #9 ; =0x9
; CHECK-NEXT: mov w9, #10 ; =0xa
; CHECK-NEXT: stp w9, w8, [sp, #40]
Expand All @@ -86,13 +86,6 @@ define i32 @main() nounwind ssp {
; CHECK-NEXT: str x9, [sp, #8]
; CHECK-NEXT: str w8, [sp]
; CHECK-NEXT: add x0, sp, #76
; CHECK-NEXT: mov w1, #2 ; =0x2
; CHECK-NEXT: mov w2, #3 ; =0x3
; CHECK-NEXT: mov w3, #4 ; =0x4
; CHECK-NEXT: mov w4, #5 ; =0x5
; CHECK-NEXT: mov w5, #6 ; =0x6
; CHECK-NEXT: mov w6, #7 ; =0x7
; CHECK-NEXT: mov w7, #8 ; =0x8
; CHECK-NEXT: bl _fn9
; CHECK-NEXT: mov w0, #0 ; =0x0
; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
Expand Down
Loading