1111//
1212// ===----------------------------------------------------------------------===//
1313
14+ #include " EVM.h"
1415#include " EVMMCInstLower.h"
1516#include " EVMMachineFunctionInfo.h"
1617#include " EVMTargetMachine.h"
2223#include " llvm/ADT/StringSet.h"
2324#include " llvm/BinaryFormat/ELF.h"
2425#include " llvm/CodeGen/AsmPrinter.h"
26+ #include " llvm/IR/GlobalVariable.h"
27+ #include " llvm/IR/Module.h"
2528#include " llvm/MC/MCAsmInfo.h"
2629#include " llvm/MC/MCContext.h"
2730#include " llvm/MC/MCInst.h"
@@ -49,6 +52,16 @@ class EVMAsmPrinter : public AsmPrinter {
4952 StringSet<> WideRelocSymbolsSet;
5053 StringMap<unsigned > ImmutablesMap;
5154
55+ // Contains constant global variable initializers in address space AS_CODE,
56+ // which are concatenated into a single block. Duplicate initializers and
57+ // those that are substrings of others are removed. This "data section"
58+ // is emitted at the end of the .text section.
59+ std::string DataSectionBuffer;
60+ MCSymbol *DataSectionSymbol = nullptr ;
61+ // Maps each global variable symbol to the offset within the data section
62+ // where its corresponding initializer is located.
63+ DenseMap<const MCSymbol *, uint64_t > GlobSymbolToOffsetMap;
64+
5265 // True if there is a function that pushes deploy address.
5366 bool ModuleHasPushDeployAddress = false ;
5467
@@ -68,14 +81,20 @@ class EVMAsmPrinter : public AsmPrinter {
6881
6982 void emitEndOfAsmFile (Module &) override ;
7083
84+ void emitStartOfAsmFile (Module &) override ;
85+
7186 void emitFunctionBodyStart () override ;
87+
7288 void emitFunctionBodyEnd () override ;
7389
90+ void emitGlobalVariable (const GlobalVariable *GV) override ;
91+
7492private:
7593 void emitAssemblySymbol (const MachineInstr *MI);
7694 void emitWideRelocatableSymbol (const MachineInstr *MI);
7795 void emitLoadImmutableLabel (const MachineInstr *MI);
7896 void emitJumpDest ();
97+ void createDataSectionBuffer (const Module &M);
7998};
8099} // end of anonymous namespace
81100
@@ -221,7 +240,7 @@ void EVMAsmPrinter::emitInstruction(const MachineInstr *MI) {
221240 }
222241
223242 MCInst TmpInst;
224- MCInstLowering.Lower (MI, TmpInst);
243+ MCInstLowering.Lower (MI, TmpInst, GlobSymbolToOffsetMap, DataSectionSymbol );
225244 EmitToStreamer (*OutStreamer, TmpInst);
226245}
227246
@@ -328,8 +347,49 @@ void EVMAsmPrinter::emitWideRelocatableSymbol(const MachineInstr *MI) {
328347 OutStreamer->switchSection (CurrentSection);
329348}
330349
331- void EVMAsmPrinter::emitEndOfAsmFile (Module &) {
350+ void EVMAsmPrinter::createDataSectionBuffer (const Module &M) {
351+ SmallVector<std::pair<const GlobalVariable *, StringRef>, 16 > Globals;
352+ for (const GlobalVariable &GV : M.globals ()) {
353+ if (GV.getAddressSpace () != EVMAS::AS_CODE || !GV.hasInitializer ())
354+ continue ;
355+
356+ const auto *CV = dyn_cast<ConstantDataSequential>(GV.getInitializer ());
357+ if (!CV)
358+ continue ;
359+
360+ Globals.emplace_back (&GV, CV->getRawDataValues ());
361+ }
362+ // Sort global variables in descending order based on the size of their
363+ // initializers.
364+ stable_sort (Globals, [](const auto &A, const auto &B) {
365+ return A.second .size () > B.second .size ();
366+ });
367+
368+ // Construct the data section by concatenating unique initializers,
369+ // eliminating duplicates, and excluding any initializer that is a
370+ // substring of another.
371+ // NOTE: Rather than simply concatenating unique strings, we could attempt
372+ // to compute the Shortest Common Superstring by allowing partial overlaps
373+ // between strings. Although this is an NP-hard problem, we could explore
374+ // an approximate greedy solution. Consider this approach if there are
375+ // real programs that could benefit from the optimization.
376+ DataSectionBuffer.clear ();
377+ raw_string_ostream Stream (DataSectionBuffer);
378+ for (const auto &[_, Init] : Globals)
379+ if (!StringRef (DataSectionBuffer).contains (Init))
380+ Stream << Init;
381+
382+ // Compute offsets of each global initializer in the data section.
383+ StringRef DataView (DataSectionBuffer);
384+ for (const auto &[GV, Init] : Globals) {
385+ size_t Offset = DataView.find (Init);
386+ assert (Offset != StringRef::npos &&
387+ " Initializer not found in data section" );
388+ GlobSymbolToOffsetMap[getSymbol (GV)] = Offset;
389+ }
390+ }
332391
392+ void EVMAsmPrinter::emitEndOfAsmFile (Module &) {
333393 // The deploy and runtime code must end with INVALID instruction to
334394 // comply with 'solc'. To ensure this, we append an INVALID
335395 // instruction at the end of the .text section.
@@ -346,8 +406,14 @@ void EVMAsmPrinter::emitEndOfAsmFile(Module &) {
346406 TM.getTargetFeatureString ()));
347407
348408 OutStreamer->emitInstruction (MCI, *STI);
409+
410+ // Emit constants to the code.
411+ OutStreamer->emitLabel (DataSectionSymbol);
412+ OutStreamer->emitBinaryData (DataSectionBuffer);
413+
349414 OutStreamer->popSection ();
350415
416+ GlobSymbolToOffsetMap.clear ();
351417 WideRelocSymbolsSet.clear ();
352418 ImmutablesMap.clear ();
353419 ModuleHasPushDeployAddress = false ;
@@ -360,6 +426,20 @@ void EVMAsmPrinter::emitJumpDest() {
360426 EmitToStreamer (*OutStreamer, JumpDest);
361427}
362428
429+ void EVMAsmPrinter::emitStartOfAsmFile (Module &M) {
430+ createDataSectionBuffer (M);
431+ DataSectionSymbol = OutContext.getOrCreateSymbol (" code_data_section" );
432+ }
433+
434+ void EVMAsmPrinter::emitGlobalVariable (const GlobalVariable *GV) {
435+ // Constant arrays are handled above.
436+ if (GV->getAddressSpace () == EVMAS::AS_CODE && GV->hasInitializer ())
437+ if (isa<ConstantDataSequential>(GV->getInitializer ()))
438+ return ;
439+
440+ AsmPrinter::emitGlobalVariable (GV);
441+ }
442+
363443extern " C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeEVMAsmPrinter () {
364444 const RegisterAsmPrinter<EVMAsmPrinter> X (getTheEVMTarget ());
365445}
0 commit comments