Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/WebAssembly/WebAssembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ FunctionPass *createWebAssemblyReplacePhysRegs();
FunctionPass *createWebAssemblyNullifyDebugValueLists();
FunctionPass *createWebAssemblyOptimizeLiveIntervals();
FunctionPass *createWebAssemblyMemIntrinsicResults();
FunctionPass *createWebAssemblyRegStackify();
FunctionPass *createWebAssemblyRegStackify(CodeGenOptLevel OptLevel);
FunctionPass *createWebAssemblyRegColoring();
FunctionPass *createWebAssemblyFixBrTableDefaults();
FunctionPass *createWebAssemblyFixIrreducibleControlFlow();
Expand Down
138 changes: 89 additions & 49 deletions llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,18 @@ using namespace llvm;

namespace {
class WebAssemblyRegStackify final : public MachineFunctionPass {
bool Optimize;

StringRef getPassName() const override {
return "WebAssembly Register Stackify";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTreeWrapperPass>();
AU.addRequired<LiveIntervalsWrapperPass>();
if (Optimize) {
AU.addRequired<LiveIntervalsWrapperPass>();
AU.addRequired<MachineDominatorTreeWrapperPass>();
}
AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
AU.addPreserved<SlotIndexesWrapperPass>();
AU.addPreserved<LiveIntervalsWrapperPass>();
Expand All @@ -61,7 +65,9 @@ class WebAssemblyRegStackify final : public MachineFunctionPass {

public:
static char ID; // Pass identification, replacement for typeid
WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
WebAssemblyRegStackify(CodeGenOptLevel OptLevel)
: MachineFunctionPass(ID), Optimize(OptLevel != CodeGenOptLevel::None) {}
WebAssemblyRegStackify() : WebAssemblyRegStackify(CodeGenOptLevel::Default) {}
};
} // end anonymous namespace

Expand All @@ -70,8 +76,8 @@ INITIALIZE_PASS(WebAssemblyRegStackify, DEBUG_TYPE,
"Reorder instructions to use the WebAssembly value stack",
false, false)

FunctionPass *llvm::createWebAssemblyRegStackify() {
return new WebAssemblyRegStackify();
FunctionPass *llvm::createWebAssemblyRegStackify(CodeGenOptLevel OptLevel) {
return new WebAssemblyRegStackify(OptLevel);
}

// Decorate the given instruction with implicit operands that enforce the
Expand All @@ -96,8 +102,7 @@ static void imposeStackOrdering(MachineInstr *MI) {
static void convertImplicitDefToConstZero(MachineInstr *MI,
MachineRegisterInfo &MRI,
const TargetInstrInfo *TII,
MachineFunction &MF,
LiveIntervals &LIS) {
MachineFunction &MF) {
assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);

const auto *RegClass = MRI.getRegClass(MI->getOperand(0).getReg());
Expand Down Expand Up @@ -262,36 +267,53 @@ static bool shouldRematerialize(const MachineInstr &Def,
// LiveIntervals to handle complex cases.
static MachineInstr *getVRegDef(unsigned Reg, const MachineInstr *Insert,
const MachineRegisterInfo &MRI,
const LiveIntervals &LIS) {
const LiveIntervals *LIS) {
// Most registers are in SSA form here so we try a quick MRI query first.
if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
return Def;

// MRI doesn't know what the Def is. Try asking LIS.
if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
LIS.getInstructionIndex(*Insert)))
return LIS.getInstructionFromIndex(ValNo->def);
if (LIS != nullptr) {
SlotIndex InstIndex = LIS->getInstructionIndex(*Insert);
if (const VNInfo *ValNo = LIS->getInterval(Reg).getVNInfoBefore(InstIndex))
return LIS->getInstructionFromIndex(ValNo->def);
}

return nullptr;
}

// Test whether Reg, as defined at Def, has exactly one use. This is a
// generalization of MachineRegisterInfo::hasOneNonDBGUse that uses
// LiveIntervals to handle complex cases.
static bool hasOneNonDBGUse(unsigned Reg, MachineInstr *Def,
MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
LiveIntervals &LIS) {
// LiveIntervals to handle complex cases in optimized code.
static bool hasSingleUse(unsigned Reg, MachineRegisterInfo &MRI,
WebAssemblyFunctionInfo &MFI, bool Optimize,
MachineInstr *Def, LiveIntervals *LIS) {
if (!Optimize) {
// Using "hasOneUse" instead of "hasOneNonDBGUse" here because we don't
// want to stackify DBG_VALUE operands - WASM stack locations are less
// useful and less widely supported than WASM local locations.
if (!MRI.hasOneUse(Reg))
return false;
// The frame base always has an implicit DBG use as DW_AT_frame_base.
if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg)
return false;
return true;
}

// Most registers are in SSA form here so we try a quick MRI query first.
if (MRI.hasOneNonDBGUse(Reg))
return true;

if (LIS == nullptr)
return false;

bool HasOne = false;
const LiveInterval &LI = LIS.getInterval(Reg);
const LiveInterval &LI = LIS->getInterval(Reg);
const VNInfo *DefVNI =
LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot());
LI.getVNInfoAt(LIS->getInstructionIndex(*Def).getRegSlot());
assert(DefVNI);
for (auto &I : MRI.use_nodbg_operands(Reg)) {
const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
const auto &Result = LI.Query(LIS->getInstructionIndex(*I.getParent()));
if (Result.valueIn() == DefVNI) {
if (!Result.isKill())
return false;
Expand All @@ -311,7 +333,7 @@ static bool hasOneNonDBGUse(unsigned Reg, MachineInstr *Def,
static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use,
const MachineInstr *Insert,
const WebAssemblyFunctionInfo &MFI,
const MachineRegisterInfo &MRI) {
const MachineRegisterInfo &MRI, bool Optimize) {
const MachineInstr *DefI = Def->getParent();
const MachineInstr *UseI = Use->getParent();
assert(DefI->getParent() == Insert->getParent());
Expand Down Expand Up @@ -357,6 +379,12 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use,
if (NextI == Insert)
return true;

// When not optimizing, we only handle the trivial case above
// to guarantee no impact to debugging and to avoid spending
// compile time.
if (!Optimize)
return false;

// 'catch' and 'catch_all' should be the first instruction of a BB and cannot
// move.
if (WebAssembly::isCatch(DefI->getOpcode()))
Expand Down Expand Up @@ -520,14 +548,15 @@ static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
/// dependencies; move the def down and nest it with the current instruction.
static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
MachineInstr *Def, MachineBasicBlock &MBB,
MachineInstr *Insert, LiveIntervals &LIS,
MachineInstr *Insert, LiveIntervals *LIS,
WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI) {
LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump());

WebAssemblyDebugValueManager DefDIs(Def);
DefDIs.sink(Insert);
LIS.handleMove(*Def);
if (LIS != nullptr)
LIS->handleMove(*Def);

if (MRI.hasOneDef(Reg) && MRI.hasOneNonDBGUse(Reg)) {
// No one else is using this register for anything so we can just stackify
Expand All @@ -540,17 +569,18 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
Op.setReg(NewReg);
DefDIs.updateReg(NewReg);

// Tell LiveIntervals about the new register.
LIS.createAndComputeVirtRegInterval(NewReg);
if (LIS != nullptr) {
// Tell LiveIntervals about the new register.
LIS->createAndComputeVirtRegInterval(NewReg);

// Tell LiveIntervals about the changes to the old register.
LiveInterval &LI = LIS.getInterval(Reg);
LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(),
LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
/*RemoveDeadValNo=*/true);
// Tell LiveIntervals about the changes to the old register.
LiveInterval &LI = LIS->getInterval(Reg);
LI.removeSegment(LIS->getInstructionIndex(*Def).getRegSlot(),
LIS->getInstructionIndex(*Op.getParent()).getRegSlot(),
/*RemoveDeadValNo=*/true);
}

MFI.stackifyVReg(MRI, NewReg);

LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
}

Expand All @@ -567,11 +597,12 @@ static MachineInstr *getPrevNonDebugInst(MachineInstr *MI) {

/// A trivially cloneable instruction; clone it and nest the new copy with the
/// current instruction.
static MachineInstr *rematerializeCheapDef(
unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) {
static MachineInstr *
rematerializeCheapDef(unsigned Reg, MachineOperand &Op, MachineInstr &Def,
MachineBasicBlock::instr_iterator Insert,
LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI,
const WebAssemblyInstrInfo *TII) {
LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
LLVM_DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());

Expand Down Expand Up @@ -811,9 +842,12 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
MachineDominatorTree *MDT = nullptr;
LiveIntervals *LIS = nullptr;
if (Optimize) {
MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
}

// Walk the instructions from the bottom up. Currently we don't look past
// block boundaries, and the blocks aren't ordered so the block visitation
Expand Down Expand Up @@ -876,23 +910,28 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// supports intra-block moves) and it's MachineSink's job to catch all
// the sinking opportunities anyway.
bool SameBlock = DefI->getParent() == &MBB;
bool CanMove = SameBlock && isSafeToMove(Def, &Use, Insert, MFI, MRI) &&
bool CanMove = SameBlock &&
isSafeToMove(Def, &Use, Insert, MFI, MRI, Optimize) &&
!TreeWalker.isOnStack(Reg);
if (CanMove && hasOneNonDBGUse(Reg, DefI, MRI, MDT, LIS)) {
if (CanMove && hasSingleUse(Reg, MRI, MFI, Optimize, DefI, LIS)) {
Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI);

// If we are removing the frame base reg completely, remove the debug
// info as well.
// TODO: Encode this properly as a stackified value.
if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg)
if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg) {
assert(
Optimize &&
"Stackifying away frame base in unoptimized code not expected");
MFI.clearFrameBaseVreg();
} else if (shouldRematerialize(*DefI, TII)) {
Insert =
rematerializeCheapDef(Reg, Use, *DefI, MBB, Insert->getIterator(),
LIS, MFI, MRI, TII, TRI);
} else if (CanMove && oneUseDominatesOtherUses(Reg, Use, MBB, MRI, MDT,
LIS, MFI)) {
Insert = moveAndTeeForMultiUse(Reg, Use, DefI, MBB, Insert, LIS, MFI,
}
} else if (Optimize && shouldRematerialize(*DefI, TII)) {
Insert = rematerializeCheapDef(Reg, Use, *DefI, Insert->getIterator(),
*LIS, MFI, MRI, TII);
} else if (Optimize && CanMove &&
oneUseDominatesOtherUses(Reg, Use, MBB, MRI, *MDT, *LIS,
MFI)) {
Insert = moveAndTeeForMultiUse(Reg, Use, DefI, MBB, Insert, *LIS, MFI,
MRI, TII);
} else {
// We failed to stackify the operand. If the problem was ordering
Expand All @@ -915,7 +954,8 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
Register DefReg = SubsequentDef->getReg();
Register UseReg = SubsequentUse->getReg();
// TODO: This single-use restriction could be relaxed by using tees
if (DefReg != UseReg || !MRI.hasOneNonDBGUse(DefReg))
if (DefReg != UseReg ||
!hasSingleUse(DefReg, MRI, MFI, Optimize, nullptr, nullptr))
break;
MFI.stackifyVReg(MRI, DefReg);
++SubsequentDef;
Expand All @@ -926,7 +966,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// to a constant 0 so that the def is explicit, and the push/pop
// correspondence is maintained.
if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
convertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS);
convertImplicitDefToConstZero(Insert, MRI, TII, MF);

// We stackified an operand. Add the defining instruction's operands to
// the worklist stack now to continue to build an ever deeper tree.
Expand Down
14 changes: 8 additions & 6 deletions llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -603,14 +603,16 @@ void WebAssemblyPassConfig::addPreEmitPass() {

// Prepare memory intrinsic calls for register stackifying.
addPass(createWebAssemblyMemIntrinsicResults());
}

// Mark registers as representing wasm's value stack. This is a key
// code-compression technique in WebAssembly. We run this pass (and
// MemIntrinsicResults above) very late, so that it sees as much code as
// possible, including code emitted by PEI and expanded by late tail
// duplication.
addPass(createWebAssemblyRegStackify());
// Mark registers as representing wasm's value stack. This is a key
// code-compression technique in WebAssembly. We run this pass (and
// MemIntrinsicResults above) very late, so that it sees as much code as
// possible, including code emitted by PEI and expanded by late tail
// duplication.
addPass(createWebAssemblyRegStackify(getOptLevel()));

if (getOptLevel() != CodeGenOptLevel::None) {
// Run the register coloring pass to reduce the total number of registers.
// This runs after stackification so that it doesn't consider registers
// that become stackified.
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/WebAssembly/PR40172.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ target triple = "wasm32-unknown-unknown"

; CHECK: i32.sub $[[BASE:[0-9]+]]=,
; CHECK: local.copy $[[ARG:[0-9]+]]=, $0{{$}}
; CHECK: i32.const $[[A0:[0-9]+]]=, 1{{$}}
; CHECK: i32.and $[[A1:[0-9]+]]=, $[[ARG]], $[[A0]]{{$}}
; CHECK: i32.store8 8($[[BASE]]), $[[A1]]{{$}}
; CHECK: i32.const $push[[A0:[0-9]+]]=, 1{{$}}
; CHECK: i32.and $push[[A1:[0-9]+]]=, $[[ARG]], $pop[[A0]]{{$}}
; CHECK: i32.store8 8($[[BASE]]), $pop[[A1]]{{$}}

define void @test(i8 %byte) {
%t = alloca { i8, i8 }, align 8
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/WebAssembly/PR41841.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ declare void @foo(i128)

; CHECK-LABEL: test_zext:
; CHECK-NEXT: .functype test_zext (i32) -> (){{$}}
; CHECK-NEXT: i64.extend_i32_u $[[TMP3:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: i64.const $[[TMP4:[0-9]+]]=, 1{{$}}
; CHECK-NEXT: i64.and $[[TMP1:[0-9]+]]=, $[[TMP3]], $[[TMP4]]{{$}}
; CHECK-NEXT: i64.extend_i32_u $push[[TMP3:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: i64.const $push[[TMP4:[0-9]+]]=, 1{{$}}
; CHECK-NEXT: i64.and $[[TMP1:[0-9]+]]=, $pop[[TMP3]], $pop[[TMP4]]{{$}}
; CHECK-NEXT: i64.const $[[TMP2:[0-9]+]]=, 0{{$}}
; CHECK-NEXT: call foo, $[[TMP1]], $[[TMP2]]{{$}}
; CHECK-NEXT: return{{$}}
Expand All @@ -23,11 +23,11 @@ next: ; preds = %start

; CHECK-LABEL: test_sext:
; CHECK-NEXT:.functype test_sext (i32) -> (){{$}}
; CHECK-NEXT: i64.extend_i32_u $[[TMP3:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: i64.const $[[TMP4:[0-9]+]]=, 1{{$}}
; CHECK-NEXT: i64.and $[[TMP5:[0-9]+]]=, $[[TMP3]], $[[TMP4]]{{$}}
; CHECK-NEXT: i64.const $[[TMP6:[0-9]+]]=, 0{{$}}
; CHECK-NEXT: i64.sub $[[TMP1:[0-9]+]]=, $[[TMP6]], $[[TMP5]]{{$}}
; CHECK-NEXT: i64.extend_i32_u $push[[TMP3:[0-9]+]]=, $0{{$}}
; CHECK-NEXT: i64.const $push[[TMP4:[0-9]+]]=, 1{{$}}
; CHECK-NEXT: i64.and $[[TMP5:[0-9]+]]=, $pop[[TMP3]], $pop[[TMP4]]{{$}}
; CHECK-NEXT: i64.const $push[[TMP6:[0-9]+]]=, 0{{$}}
; CHECK-NEXT: i64.sub $[[TMP1:[0-9]+]]=, $pop[[TMP6]], $[[TMP5]]{{$}}
; CHECK-NEXT: local.copy $[[TMP2:[0-9]+]]=, $[[TMP1]]{{$}}
; CHECK-NEXT: call foo, $[[TMP1]], $[[TMP2]]{{$}}
; CHECK-NEXT: return{{$}}
Expand Down
Loading
Loading