Skip to content

Commit 41693a3

Browse files
PavelKopylhedgar2017
authored andcommitted
[EVM] Support data sections in the code
This is achieved by emitting global variables with constant initializers into the code section. Removed unused patters from TableGen.
1 parent e6dd841 commit 41693a3

File tree

11 files changed

+302
-49
lines changed

11 files changed

+302
-49
lines changed

llvm/lib/Target/EVM/EVMAsmPrinter.cpp

Lines changed: 82 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14+
#include "EVM.h"
1415
#include "EVMMCInstLower.h"
1516
#include "EVMMachineFunctionInfo.h"
1617
#include "EVMTargetMachine.h"
@@ -22,6 +23,8 @@
2223
#include "llvm/ADT/StringSet.h"
2324
#include "llvm/BinaryFormat/ELF.h"
2425
#include "llvm/CodeGen/AsmPrinter.h"
26+
#include "llvm/IR/GlobalVariable.h"
27+
#include "llvm/IR/Module.h"
2528
#include "llvm/MC/MCAsmInfo.h"
2629
#include "llvm/MC/MCContext.h"
2730
#include "llvm/MC/MCInst.h"
@@ -49,6 +52,16 @@ class EVMAsmPrinter : public AsmPrinter {
4952
StringSet<> WideRelocSymbolsSet;
5053
StringMap<unsigned> ImmutablesMap;
5154

55+
// Contains constant global variable initializers in address space AS_CODE,
56+
// which are concatenated into a single block. Duplicate initializers and
57+
// those that are substrings of others are removed. This "data section"
58+
// is emitted at the end of the .text section.
59+
std::string DataSectionBuffer;
60+
MCSymbol *DataSectionSymbol = nullptr;
61+
// Maps each global variable symbol to the offset within the data section
62+
// where its corresponding initializer is located.
63+
DenseMap<const MCSymbol *, uint64_t> GlobSymbolToOffsetMap;
64+
5265
// True if there is a function that pushes deploy address.
5366
bool ModuleHasPushDeployAddress = false;
5467

@@ -68,14 +81,20 @@ class EVMAsmPrinter : public AsmPrinter {
6881

6982
void emitEndOfAsmFile(Module &) override;
7083

84+
void emitStartOfAsmFile(Module &) override;
85+
7186
void emitFunctionBodyStart() override;
87+
7288
void emitFunctionBodyEnd() override;
7389

90+
void emitGlobalVariable(const GlobalVariable *GV) override;
91+
7492
private:
7593
void emitAssemblySymbol(const MachineInstr *MI);
7694
void emitWideRelocatableSymbol(const MachineInstr *MI);
7795
void emitLoadImmutableLabel(const MachineInstr *MI);
7896
void emitJumpDest();
97+
void createDataSectionBuffer(const Module &M);
7998
};
8099
} // end of anonymous namespace
81100

@@ -221,7 +240,7 @@ void EVMAsmPrinter::emitInstruction(const MachineInstr *MI) {
221240
}
222241

223242
MCInst TmpInst;
224-
MCInstLowering.Lower(MI, TmpInst);
243+
MCInstLowering.Lower(MI, TmpInst, GlobSymbolToOffsetMap, DataSectionSymbol);
225244
EmitToStreamer(*OutStreamer, TmpInst);
226245
}
227246

@@ -328,8 +347,49 @@ void EVMAsmPrinter::emitWideRelocatableSymbol(const MachineInstr *MI) {
328347
OutStreamer->switchSection(CurrentSection);
329348
}
330349

331-
void EVMAsmPrinter::emitEndOfAsmFile(Module &) {
350+
void EVMAsmPrinter::createDataSectionBuffer(const Module &M) {
351+
SmallVector<std::pair<const GlobalVariable *, StringRef>, 16> Globals;
352+
for (const GlobalVariable &GV : M.globals()) {
353+
if (GV.getAddressSpace() != EVMAS::AS_CODE || !GV.hasInitializer())
354+
continue;
355+
356+
const auto *CV = dyn_cast<ConstantDataSequential>(GV.getInitializer());
357+
if (!CV)
358+
continue;
359+
360+
Globals.emplace_back(&GV, CV->getRawDataValues());
361+
}
362+
// Sort global variables in descending order based on the size of their
363+
// initializers.
364+
stable_sort(Globals, [](const auto &A, const auto &B) {
365+
return A.second.size() > B.second.size();
366+
});
367+
368+
// Construct the data section by concatenating unique initializers,
369+
// eliminating duplicates, and excluding any initializer that is a
370+
// substring of another.
371+
// NOTE: Rather than simply concatenating unique strings, we could attempt
372+
// to compute the Shortest Common Superstring by allowing partial overlaps
373+
// between strings. Although this is an NP-hard problem, we could explore
374+
// an approximate greedy solution. Consider this approach if there are
375+
// real programs that could benefit from the optimization.
376+
DataSectionBuffer.clear();
377+
raw_string_ostream Stream(DataSectionBuffer);
378+
for (const auto &[_, Init] : Globals)
379+
if (!StringRef(DataSectionBuffer).contains(Init))
380+
Stream << Init;
381+
382+
// Compute offsets of each global initializer in the data section.
383+
StringRef DataView(DataSectionBuffer);
384+
for (const auto &[GV, Init] : Globals) {
385+
size_t Offset = DataView.find(Init);
386+
assert(Offset != StringRef::npos &&
387+
"Initializer not found in data section");
388+
GlobSymbolToOffsetMap[getSymbol(GV)] = Offset;
389+
}
390+
}
332391

392+
void EVMAsmPrinter::emitEndOfAsmFile(Module &) {
333393
// The deploy and runtime code must end with INVALID instruction to
334394
// comply with 'solc'. To ensure this, we append an INVALID
335395
// instruction at the end of the .text section.
@@ -346,8 +406,14 @@ void EVMAsmPrinter::emitEndOfAsmFile(Module &) {
346406
TM.getTargetFeatureString()));
347407

348408
OutStreamer->emitInstruction(MCI, *STI);
409+
410+
// Emit constants to the code.
411+
OutStreamer->emitLabel(DataSectionSymbol);
412+
OutStreamer->emitBinaryData(DataSectionBuffer);
413+
349414
OutStreamer->popSection();
350415

416+
GlobSymbolToOffsetMap.clear();
351417
WideRelocSymbolsSet.clear();
352418
ImmutablesMap.clear();
353419
ModuleHasPushDeployAddress = false;
@@ -360,6 +426,20 @@ void EVMAsmPrinter::emitJumpDest() {
360426
EmitToStreamer(*OutStreamer, JumpDest);
361427
}
362428

429+
void EVMAsmPrinter::emitStartOfAsmFile(Module &M) {
430+
createDataSectionBuffer(M);
431+
DataSectionSymbol = OutContext.getOrCreateSymbol("code_data_section");
432+
}
433+
434+
void EVMAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
435+
// Constant arrays are handled above.
436+
if (GV->getAddressSpace() == EVMAS::AS_CODE && GV->hasInitializer())
437+
if (isa<ConstantDataSequential>(GV->getInitializer()))
438+
return;
439+
440+
AsmPrinter::emitGlobalVariable(GV);
441+
}
442+
363443
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeEVMAsmPrinter() {
364444
const RegisterAsmPrinter<EVMAsmPrinter> X(getTheEVMTarget());
365445
}

llvm/lib/Target/EVM/EVMISelLowering.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,8 +473,16 @@ SDValue EVMTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
473473
MemOpISD = EVMISD::MEMCPY_CODE;
474474
break;
475475
}
476+
477+
SDValue SrcOp = Op.getOperand(3);
478+
// Support for copying bytes from a data section residing in code memory.
479+
if (MemOpISD == EVMISD::MEMCPY_CODE)
480+
if (const auto *GA = dyn_cast<GlobalAddressSDNode>(SrcOp))
481+
SrcOp = DAG.getMCSymbol(DAG.getTarget().getSymbol(GA->getGlobal()),
482+
MVT::i256);
483+
476484
return DAG.getNode(MemOpISD, DL, MVT::Other, Op.getOperand(0),
477-
Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
485+
Op.getOperand(2), SrcOp, Op.getOperand(4));
478486
}
479487

480488
//===----------------------------------------------------------------------===//

llvm/lib/Target/EVM/EVMInstrInfo.td

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@ def SDT_EVMRet
2929
def SDT_EVMSignextend
3030
: SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<1, i256>]>;
3131

32-
def SDT_EVMTargetAddrWrapper
33-
: SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
34-
3532
def SDT_EVMMemcpy
3633
: SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>;
3734

@@ -57,9 +54,6 @@ def EVMret
5754
def EVMSignextend
5855
: SDNode<"EVMISD::SIGNEXTEND", SDT_EVMSignextend>;
5956

60-
def EVMTargetAddrWrapper
61-
: SDNode<"EVMISD::TARGET_ADDR_WRAPPER", SDT_EVMTargetAddrWrapper>;
62-
6357
def EVMMemcpy_call_data
6458
: SDNode<"EVMISD::MEMCPY_CALL_DATA", SDT_EVMMemcpy,
6559
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
@@ -211,11 +205,6 @@ def CONST_I256
211205
let isAsCheapAsAMove = 1 in def COPY_I256
212206
: NRI<(outs GPR:$res), (ins GPR:$src), [], "COPY_I256 $res, $src">;
213207

214-
def : Pat<(i256 (EVMTargetAddrWrapper tglobaladdr:$addr)),
215-
(CONST_I256 tglobaladdr:$addr)>;
216-
def : Pat<(i256 (EVMTargetAddrWrapper texternalsym:$addr)),
217-
(CONST_I256 texternalsym:$addr)>;
218-
219208
let Uses = [SP], isCall = 1 in {
220209

221210
// CALL should take both variadic arguments and produce variadic results, but

llvm/lib/Target/EVM/EVMMCInstLower.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,10 @@ MCOperand EVMMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
8787
return MCOperand::createExpr(Expr);
8888
}
8989

90-
void EVMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) {
90+
void EVMMCInstLower::Lower(
91+
const MachineInstr *MI, MCInst &OutMI,
92+
const DenseMap<const MCSymbol *, uint64_t> &GlobSymbolToOffsetMap,
93+
const MCSymbol *DataSectionSymbol) {
9194
OutMI.setOpcode(MI->getOpcode());
9295
const MCInstrDesc &Desc = MI->getDesc();
9396
for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
@@ -125,16 +128,25 @@ void EVMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) {
125128
}
126129
} break;
127130
case MachineOperand::MO_MCSymbol: {
128-
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VariantKind::VK_None;
129131
#ifndef NDEBUG
130132
unsigned Opc = MI->getOpcode();
131133
// We handle the linkage-related instructions in the EVMAsmPrinter.
132134
assert(Opc != EVM::DATASIZE_S && Opc != EVM::DATAOFFSET_S &&
133135
Opc != EVM::LINKERSYMBOL_S && Opc != EVM::LOADIMMUTABLE_S);
134136
#endif // NDEBUG
135137

136-
MCOp = MCOperand::createExpr(
137-
MCSymbolRefExpr::create(MO.getMCSymbol(), Kind, Ctx));
138+
if (auto It = GlobSymbolToOffsetMap.find(MO.getMCSymbol());
139+
It != GlobSymbolToOffsetMap.end()) {
140+
const MCExpr *Expr = MCSymbolRefExpr::create(DataSectionSymbol, Ctx);
141+
if (It->second)
142+
Expr = MCBinaryExpr::createAdd(
143+
Expr, MCConstantExpr::create(It->second, Ctx), Ctx);
144+
145+
MCOp = MCOperand::createExpr(Expr);
146+
break;
147+
}
148+
MCOp =
149+
MCOperand::createExpr(MCSymbolRefExpr::create(MO.getMCSymbol(), Ctx));
138150
} break;
139151
case MachineOperand::MO_MachineBasicBlock:
140152
MCOp = MCOperand::createExpr(

llvm/lib/Target/EVM/EVMMCInstLower.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
namespace llvm {
1515
class AsmPrinter;
16+
class GlobalValue;
1617
class MCContext;
1718
class MCInst;
1819
class MCOperand;
@@ -40,7 +41,9 @@ class LLVM_LIBRARY_VISIBILITY EVMMCInstLower {
4041
const VRegRCMap &VRegMapping, const MachineRegisterInfo &MRI)
4142
: Ctx(Ctx), Printer(Printer), VRegMapping(VRegMapping), MRI(MRI) {}
4243

43-
void Lower(const MachineInstr *MI, MCInst &OutMI);
44+
void Lower(const MachineInstr *MI, MCInst &OutMI,
45+
const DenseMap<const MCSymbol *, uint64_t> &GlobSymbolToOffsetMap,
46+
const MCSymbol *DataSectionSymbol);
4447

4548
private:
4649
// Encodes the register class in the upper 4 bits along with the register

llvm/lib/Target/EVM/EVMStackModel.cpp

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,10 @@ StackSlot *EVMStackModel::getStackSlot(const MachineOperand &MO) const {
8787
Stack EVMStackModel::getSlotsForInstructionUses(const MachineInstr &MI) const {
8888
Stack In;
8989
for (const auto &MO : reverse(MI.explicit_uses())) {
90-
// All the non-register operands are handled in instruction specific
91-
// handlers.
92-
if (!MO.isReg())
93-
continue;
94-
95-
// SP is not used anyhow.
96-
if (MO.getReg() == EVM::SP)
97-
continue;
98-
99-
In.push_back(getStackSlot(MO));
90+
if (MO.isReg() && MO.getReg() != EVM::SP)
91+
In.push_back(getStackSlot(MO));
92+
else if (MO.isMCSymbol())
93+
In.push_back(getSymbolSlot(MO.getMCSymbol(), &MI));
10094
}
10195
return In;
10296
}

llvm/lib/Target/EVM/EVMStackifyCodeEmitter.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,13 @@ void EVMStackifyCodeEmitter::CodeEmitter::emitConstant(uint64_t Val) {
117117

118118
void EVMStackifyCodeEmitter::CodeEmitter::emitSymbol(const MachineInstr *MI,
119119
MCSymbol *Symbol) {
120-
assert(isLinkerPseudoMI(*MI) && "Unexpected symbol instruction");
120+
assert((isLinkerPseudoMI(*MI) || MI->getOpcode() == EVM::CODECOPY) &&
121+
"Unexpected symbol instruction");
122+
121123
StackHeight += 1;
122-
// This is codegen-only instruction, that will be converted into PUSH4.
123-
auto NewMI = BuildMI(*CurMBB, CurMBB->end(), MI->getDebugLoc(),
124-
TII->get(EVM::getStackOpcode(MI->getOpcode())))
124+
unsigned Opc = isLinkerPseudoMI(*MI) ? EVM::getStackOpcode(MI->getOpcode())
125+
: EVM::PUSH_LABEL;
126+
auto NewMI = BuildMI(*CurMBB, CurMBB->end(), MI->getDebugLoc(), TII->get(Opc))
125127
.addSym(Symbol);
126128
verify(NewMI);
127129
}

llvm/lib/Target/EVM/MCTargetDesc/EVMMCCodeEmitter.cpp

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -102,24 +102,39 @@ unsigned EVMMCCodeEmitter::getMachineOpValue(const MCInst &MI,
102102
const MCSubtargetInfo &STI) const {
103103
if (MO.isImm()) {
104104
Op = MO.getImm();
105-
} else if (MO.isExpr()) {
106-
auto Kind = MO.getExpr()->getKind();
107-
if (Kind == MCExpr::ExprKind::Target) {
108-
const auto *CImmExp = cast<EVMCImmMCExpr>(MO.getExpr());
109-
Op = APInt(Op.getBitWidth(), CImmExp->getString(), /*radix=*/16);
110-
} else if (Kind == MCExpr::ExprKind::SymbolRef) {
111-
const auto *RefExpr = cast<MCSymbolRefExpr>(MO.getExpr());
112-
MCSymbolRefExpr::VariantKind Kind = RefExpr->getKind();
113-
EVM::Fixups Fixup = getFixupForOpc(MI.getOpcode(), Kind);
114-
// The byte index of start of the relocation is always 1, as
115-
// we need to skip the instruction opcode which is always one byte.
116-
Fixups.push_back(
117-
MCFixup::create(1, MO.getExpr(), MCFixupKind(Fixup), MI.getLoc()));
118-
}
119-
} else {
120-
llvm_unreachable("Unexpected MC operand type");
105+
return 0;
121106
}
122107

108+
if (!MO.isExpr())
109+
llvm_unreachable("Unable to encode MCOperand");
110+
111+
MCExpr::ExprKind Kind = MO.getExpr()->getKind();
112+
if (Kind == MCExpr::ExprKind::Target) {
113+
const auto *CImmExp = cast<EVMCImmMCExpr>(MO.getExpr());
114+
Op = APInt(Op.getBitWidth(), CImmExp->getString(), /*radix=*/16);
115+
return 0;
116+
}
117+
118+
// We expect the relocatable immediate operand to be in the
119+
// form: @symbol + imm.
120+
const MCSymbolRefExpr *RefExpr = nullptr;
121+
if (Kind == MCExpr::ExprKind::Binary) {
122+
const auto *BE = cast<MCBinaryExpr>(MO.getExpr());
123+
RefExpr = dyn_cast<MCSymbolRefExpr>(
124+
isa<MCSymbolRefExpr>(BE->getLHS()) ? BE->getLHS() : BE->getRHS());
125+
} else if (Kind == MCExpr::ExprKind::SymbolRef) {
126+
RefExpr = cast<MCSymbolRefExpr>(MO.getExpr());
127+
}
128+
129+
if (!RefExpr)
130+
llvm_unreachable("Unexpected MCOperand type");
131+
132+
EVM::Fixups Fixup = getFixupForOpc(MI.getOpcode(), RefExpr->getKind());
133+
// The byte index of start of the relocation is always 1, as
134+
// we need to skip the instruction opcode which is always one byte.
135+
Fixups.push_back(
136+
MCFixup::create(1, MO.getExpr(), MCFixupKind(Fixup), MI.getLoc()));
137+
123138
return 0;
124139
}
125140

0 commit comments

Comments
 (0)