Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Target/ARM/ARM.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMBranchTargetsPass();
FunctionPass *createARMDeadRegisterDefinitions();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
Expand All @@ -66,6 +67,7 @@ void initializeARMBlockPlacementPass(PassRegistry &);
void initializeARMBranchTargetsPass(PassRegistry &);
void initializeARMConstantIslandsPass(PassRegistry &);
void initializeARMDAGToDAGISelLegacyPass(PassRegistry &);
void initializeARMDeadRegisterDefinitionsPass(PassRegistry &);
void initializeARMExpandPseudoPass(PassRegistry &);
void initializeARMFixCortexA57AES1742098Pass(PassRegistry &);
void initializeARMLoadStoreOptPass(PassRegistry &);
Expand Down
229 changes: 229 additions & 0 deletions llvm/lib/Target/ARM/ARMDeadRegisterDefinitionsPass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
//==-- ARMDeadRegisterDefinitions.cpp - Convert dead dests to compares --==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file When allowed by the instruction, replace dead definitions with compare
/// instructions.
//===----------------------------------------------------------------------===//

#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
using namespace llvm;

#define DEBUG_TYPE "arm-dead-defs-to-cmp"

STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");

#define ARM_DEAD_REG_DEF_NAME "ARM Convert dead defs to compares"

namespace {
class ARMDeadRegisterDefinitions : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
bool Changed;
void processMachineBasicBlock(MachineBasicBlock &MBB);

public:
static char ID; // Pass identification, replacement for typeid.
ARMDeadRegisterDefinitions() : MachineFunctionPass(ID) {}

bool runOnMachineFunction(MachineFunction &F) override;

StringRef getPassName() const override { return ARM_DEAD_REG_DEF_NAME; }

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
char ARMDeadRegisterDefinitions::ID = 0;
} // end anonymous namespace

INITIALIZE_PASS(ARMDeadRegisterDefinitions, "arm-dead-defs-to-cmp",
ARM_DEAD_REG_DEF_NAME, false, false)

static bool usesFrameIndex(const MachineInstr &MI) {
for (const MachineOperand &MO : MI.uses())
if (MO.isFI())
return true;
return false;
}

static std::optional<unsigned> mapToCmpCmnTstTeqOpcode(unsigned Opc) {
switch (Opc) {
// ARM encodings
case ARM::SUBri:
return ARM::CMPri;
case ARM::SUBrr:
return ARM::CMPrr;
case ARM::SUBrsi:
return ARM::CMPrsi;
case ARM::SUBrsr:
return ARM::CMPrsr;

case ARM::ADDri:
return ARM::CMNri;
case ARM::ADDrr:
return ARM::CMNzrr;
case ARM::ADDrsi:
return ARM::CMNzrsi;
case ARM::ADDrsr:
return ARM::CMNzrsr;

case ARM::ANDri:
return ARM::TSTri;
case ARM::ANDrr:
return ARM::TSTrr;
case ARM::ANDrsi:
return ARM::TSTrsi;
case ARM::ANDrsr:
return ARM::TSTrsr;

case ARM::EORri:
return ARM::TEQri;
case ARM::EORrr:
return ARM::TEQrr;
case ARM::EORrsi:
return ARM::TEQrsi;
case ARM::EORrsr:
return ARM::TEQrsr;

// Thumb2 encodings
case ARM::t2SUBri:
return ARM::t2CMPri;
case ARM::t2SUBrr:
return ARM::t2CMPrr;
case ARM::t2SUBrs:
return ARM::t2CMPrs;

case ARM::t2ADDri:
return ARM::t2CMNri;
case ARM::t2ADDrr:
return ARM::t2CMNzrr;
case ARM::t2ADDrs:
return ARM::t2CMNzrs;

case ARM::t2ANDri:
return ARM::t2TSTri;
case ARM::t2ANDrr:
return ARM::t2TSTrr;
case ARM::t2ANDrs:
return ARM::t2TSTrs;

case ARM::t2EORri:
return ARM::t2TEQri;
case ARM::t2EORrr:
return ARM::t2TEQrr;
case ARM::t2EORrs:
return ARM::t2TEQrs;

// Thumb1 limited support
case ARM::tSUBSrr:
return ARM::tCMPr;
case ARM::tSUBSi3:
return ARM::tCMPi8;
case ARM::tSUBSi8:
return ARM::tCMPi8;
case ARM::tAND:
return ARM::tTST;
default:
return std::nullopt;
}
}

static void copyNonDefNonPredOperands(MachineInstr &Dst,
const MachineInstr &Src) {
const MCInstrDesc &Desc = Src.getDesc();
int PIdx = Src.findFirstPredOperandIdx();
unsigned Start = Desc.getNumDefs();
unsigned End =
(PIdx == -1) ? Src.getNumOperands() : static_cast<unsigned>(PIdx);
for (unsigned I = Start; I < End; ++I)
Dst.addOperand(Src.getOperand(I));
if (PIdx != -1) {
Dst.addOperand(Src.getOperand(PIdx));
Dst.addOperand(Src.getOperand(PIdx + 1));
}
}

void ARMDeadRegisterDefinitions::processMachineBasicBlock(
MachineBasicBlock &MBB) {
// Early-increment range: iterator is advanced before the loop body, so it's
// safe to erase the current instruction inside the loop.
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (usesFrameIndex(MI))
continue;

// Only consider instructions that set CPSR (flag-setting variants).
if (!ARMBaseInstrInfo::isCPSRDefined(MI))
continue;

const MCInstrDesc &Desc = MI.getDesc();

for (int I = 0, EE = Desc.getNumDefs(); I != EE; ++I) {
MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
if (!Reg.isVirtual() || (!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
continue;
assert(!MO.isImplicit() && "Unexpected implicit def!");
if (MI.isRegTiedToUseOperand(I))
continue;

if (std::optional<unsigned> NewOpc =
mapToCmpCmnTstTeqOpcode(MI.getOpcode())) {
MachineInstrBuilder MIB =
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(*NewOpc));
copyNonDefNonPredOperands(*MIB, MI);
MIB.setMIFlags(MI.getFlags());
for (MachineMemOperand *MMO : MI.memoperands())
MIB.addMemOperand(MMO);

MI.eraseFromParent();
++NumDeadDefsReplaced;
Changed = true;
break;
}
}
}
}

// Scan the function for instructions that have a dead definition of a
// register. Replace that instruction with a compare instruction when possible
bool ARMDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
MRI = &MF.getRegInfo();
LLVM_DEBUG(dbgs() << "***** ARMDeadRegisterDefinitions *****\n");
Changed = false;
for (auto &MBB : MF)
processMachineBasicBlock(MBB);
return Changed;
}

FunctionPass *llvm::createARMDeadRegisterDefinitions() {
return new ARMDeadRegisterDefinitions();
}
12 changes: 12 additions & 0 deletions llvm/lib/Target/ARM/ARMTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ static cl::opt<cl::boolOrDefault>
EnableGlobalMerge("arm-global-merge", cl::Hidden,
cl::desc("Enable the global merge pass"));

static cl::opt<bool> EnableARMDeadRegisterElimination(
"arm-enable-dead-defs", cl::Hidden,
cl::desc("Enable the pass that replaces"
" dead-dest flag-setting ALU"
" instructions with compares/tests"
" pre-RA"),
cl::init(true));

namespace llvm {
void initializeARMExecutionDomainFixPass(PassRegistry&);
}
Expand Down Expand Up @@ -510,6 +518,10 @@ bool ARMPassConfig::addGlobalInstructionSelect() {

void ARMPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOptLevel::None) {
// Replace dead-dest flag-setting ALU with compares/tests pre-RA.
if (EnableARMDeadRegisterElimination)
addPass(createARMDeadRegisterDefinitions());

if (getOptLevel() == CodeGenOptLevel::Aggressive)
addPass(&MachinePipelinerID);

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/ARM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ add_llvm_target(ARMCodeGen
ARMCallLowering.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
ARMDeadRegisterDefinitionsPass.cpp
ARMExpandPseudoInsts.cpp
ARMFastISel.cpp
ARMFixCortexA57AES1742098Pass.cpp
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/ARM/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
; CHECK-NEXT: Machine code sinking
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: ARM Convert dead defs to compares
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/ARM/addsubcarry-promotion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
; ARM-LABEL: fn1:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: rsb r2, r2, #0
; ARM-NEXT: adds r0, r1, r0
; ARM-NEXT: cmn r1, r0
; ARM-NEXT: movw r1, #65535
; ARM-NEXT: sxth r2, r2
; ARM-NEXT: adc r0, r2, #1
Expand Down Expand Up @@ -54,7 +54,7 @@ define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
; THUMB-LABEL: fn1:
; THUMB: @ %bb.0: @ %entry
; THUMB-NEXT: rsbs r2, r2, #0
; THUMB-NEXT: adds r0, r0, r1
; THUMB-NEXT: cmn r1, r0
; THUMB-NEXT: sxth r2, r2
; THUMB-NEXT: adc r0, r2, #1
; THUMB-NEXT: lsls r0, r0, #16
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/ARM/addsubo-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ define <2 x i1> @uaddo(ptr %ptr, ptr %ptr2) {
; CHECK-NEXT: vmov r6, r7, d19
; CHECK-NEXT: vmov lr, r12, d16
; CHECK-NEXT: vmov r4, r5, d17
; CHECK-NEXT: subs.w r3, lr, r3
; CHECK-NEXT: cmp lr, r3
; CHECK-NEXT: sbcs.w r2, r12, r2
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r2, #-1
; CHECK-NEXT: subs r3, r4, r6
; CHECK-NEXT: cmp r4, r6
; CHECK-NEXT: sbcs.w r3, r5, r7
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r1, #1
Expand Down Expand Up @@ -57,15 +57,15 @@ define <2 x i1> @usubo(ptr %ptr, ptr %ptr2) {
; CHECK-NEXT: vmov r4, r5, d19
; CHECK-NEXT: vmov r3, r2, d16
; CHECK-NEXT: vmov r6, r7, d17
; CHECK-NEXT: subs.w r3, lr, r3
; CHECK-NEXT: cmp lr, r3
; CHECK-NEXT: sbcs.w r2, r12, r2
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r2, #-1
; CHECK-NEXT: subs r3, r4, r6
; CHECK-NEXT: cmp r4, r6
; CHECK-NEXT: sbcs.w r3, r5, r7
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r1, #1
Expand Down
Loading