Skip to content

Commit 876ffb6

Browse files
committed
[EVM] Add LLVMLinkEVM C-API
Please, note this is a temporary patch. It adds initial support of dependencies, but it doesn't work in a general case. A full solution will be more sophisticated and will likely be implemented on the FE driver side without a need of the LLD usage.
1 parent dcc54ed commit 876ffb6

File tree

6 files changed

+235
-8
lines changed

6 files changed

+235
-8
lines changed

lld/include/lld-c/LLDAsLibraryC.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,32 @@ char **LLVMGetUndefinedLinkerSymbolsEraVM(LLVMMemoryBufferRef inBuffer,
123123
* LLVMGetUndefinedSymbolsEraVM(). */
124124
void LLVMDisposeUndefinedLinkerSymbolsEraVM(char *linkerSymbolNames[],
125125
uint64_t numLinkerSymbols);
126+
127+
/** Links the deploy and runtime ELF object files using the information about
128+
* dependencies.
129+
* \p inBuffers - array of input memory buffers with following structure:
130+
*
131+
* inBuffers[0] - deploy ELF object code
132+
* inBuffers[1] - deployed (runtime) ELF object code
133+
* --------------------------
134+
* inBuffers[2] - 1-st sub-contract (final EVM bytecode)
135+
* ...
136+
* inBuffers[N] - N-st sub-contract (final EVM bytecode)
137+
*
138+
* Sub-contracts are optional. They should have the same ordering as in
139+
* the YUL layout.
140+
*
141+
* \p inBuffersIDs - array of string identifiers of the buffers. IDs correspond
142+
* to the object names in the YUL layout.
143+
* On success, outBuffers[0] will contain the deploy bytecode and outBuffers[1]
144+
* the runtime bytecode.
145+
* In case of an error the function returns 'true' and the error message is
146+
* passes in \p errorMessage. The message should be disposed by
147+
* 'LLVMDisposeMessage'. */
148+
LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef *inBuffers, const char *inBuffersIDs[],
149+
uint64_t numInBuffers, LLVMMemoryBufferRef outBuffers[2],
150+
char **errorMessage);
151+
126152
LLVM_C_EXTERN_C_END
127153

128154
#endif // LLD_C_LLDASLIBRARYC_H

lld/lld-c/LLDAsLibraryC.cpp

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,3 +443,199 @@ void LLVMDisposeUndefinedLinkerSymbolsEraVM(char *linkerSymbolNames[],
443443
std::free(linkerSymbolNames[idx]);
444444
std::free(linkerSymbolNames);
445445
}
446+
447+
//----------------------------------------------------------------------------//
448+
449+
/// This function generates a linker script for EVM architecture.
450+
/// \p memBufs - array of input memory buffers with following structure:
451+
///
452+
/// memBufs[0] - deploy object code
453+
/// memBufs[1] - deployed object code
454+
/// --------------------------
455+
/// memBufs[2] - 1-st sub-contract (final EVM bytecode)
456+
/// ...
457+
/// memBufs[N] - N-st sub-contract (final EVM bytecode)
458+
///
459+
/// Sub-contracts are optional. They should have the same ordering as in
460+
/// the YUL layout.
461+
///
462+
/// \p bufIDs - array of string identifiers of the buffers. IDs correspond
463+
/// to the object names in the YUL layout.
464+
///
465+
/// For example, the YUL object:
466+
///
467+
/// |--D_105_deploy --||--D_105_deployed --||-- B_40 --|
468+
///
469+
/// __datasize_B_40 = 1384;
470+
/// SECTIONS {
471+
/// . = 0;
472+
/// .text : SUBALIGN(1) {
473+
/// D_105(.text);
474+
/// __dataoffset_D_105_deployed = .;
475+
/// D_105_deployed(.text);
476+
/// __datasize_D_105_deployed = . - __dataoffset_D_105_deployed;
477+
/// __dataoffset_B_40 = .;
478+
/// __datasize_D_105 = __dataoffset_B_40 + __datasize_B_40;
479+
/// LONG(__dataoffset_D_105_deployed);
480+
/// }
481+
///
482+
/// The dot '.' denotes current location in the resulting file.
483+
/// The purpose of the script is to define datasize/dataoffset absolute symbols
484+
/// that reflect the YUL layout.
485+
static std::string creteEVMLinkerScript(ArrayRef<LLVMMemoryBufferRef> memBufs,
486+
ArrayRef<const char *> bufIDs) {
487+
assert(memBufs.size() == bufIDs.size());
488+
size_t numObjectsToLink = memBufs.size();
489+
StringRef dataSizePrefix("__datasize_");
490+
StringRef dataOffsetPrefix("__dataoffset_");
491+
492+
// Define the script part related to the top-level contract.
493+
StringRef topName(bufIDs[0]);
494+
StringRef deployed(bufIDs[1]);
495+
496+
// Contains the linker script part corresponding to the top-level contract.
497+
// For the example above, this contains:
498+
// D_105(.text);
499+
// __dataoffset_D_105_deployed = .;
500+
// D_105_deployed(.text);
501+
// __datasize_D_105_deployed = . - __dataoffset_D_105_deployed;
502+
Twine topLevel = topName + "(.text);\n" + dataOffsetPrefix + deployed +
503+
" = .;\n" + deployed + "(.text);\n" + dataSizePrefix +
504+
deployed + " = . - " + dataOffsetPrefix + deployed + ";\n";
505+
506+
// Contains symbols whose values are the sizes of the dependent contracts.
507+
// For the example above, this contains:
508+
// __datasize_B_40 = 1384;
509+
std::string symDatasizeDeps;
510+
511+
// Contains symbols whose values are the offsets of the dependent contracts.
512+
// For the example above, this contains:
513+
// __dataoffset_B_40 = .;
514+
std::string symDataOffsetDeps;
515+
if (numObjectsToLink > 2) {
516+
// Define datasize symbols for the dependent contracts. They start after
517+
// {deploy, deployed} pair of the top-level contract, i.e. at index 2.
518+
for (unsigned idx = 2; idx < numObjectsToLink; ++idx)
519+
symDatasizeDeps += (dataSizePrefix + bufIDs[idx] + " = " +
520+
Twine(LLVMGetBufferSize(memBufs[idx])) + ";\n")
521+
.str();
522+
523+
symDataOffsetDeps = (dataOffsetPrefix + bufIDs[2] + " = .;\n").str();
524+
for (unsigned idx = 3; idx < numObjectsToLink; ++idx)
525+
symDataOffsetDeps +=
526+
(dataOffsetPrefix + bufIDs[idx] + " = " + dataOffsetPrefix +
527+
bufIDs[idx - 1] + " + " + dataSizePrefix + bufIDs[idx - 1] + ";\n")
528+
.str();
529+
}
530+
531+
// Contains a symbol whose value is the total size of the top-level contract
532+
// with all the dependencies.
533+
std::string symDatasizeTop = (dataSizePrefix + topName + " = ").str();
534+
if (numObjectsToLink > 2)
535+
symDatasizeTop += (dataOffsetPrefix + bufIDs.back() + " + " +
536+
dataSizePrefix + bufIDs.back() + ";\n")
537+
.str();
538+
else
539+
symDatasizeTop += ".;\n";
540+
541+
// Emit size of the deploy code offset as the 4-byte unsigned integer.
542+
// This is needed to determine which offset the deployed code starts at
543+
// in the linked binary.
544+
Twine deploySize = "LONG(" + dataOffsetPrefix + deployed + ");\n";
545+
546+
Twine script = formatv("{0}\n\
547+
ENTRY(0);\n\
548+
SECTIONS {\n\
549+
. = 0;\n\
550+
.code : SUBALIGN(1) {\n\
551+
{1}\
552+
{2}\
553+
{3}\
554+
{4}\
555+
}\n\
556+
}\n\
557+
",
558+
symDatasizeDeps, topLevel, symDataOffsetDeps,
559+
symDatasizeTop, deploySize);
560+
561+
return script.str();
562+
}
563+
564+
LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef inBuffers[],
565+
const char *inBuffersIDs[], uint64_t numInBuffers,
566+
LLVMMemoryBufferRef outBuffers[2], char **errorMessage) {
567+
assert(numInBuffers > 1);
568+
SmallVector<MemoryBufferRef> localInMemBufRefs(3);
569+
SmallVector<std::unique_ptr<MemoryBuffer>> localInMemBufs(3);
570+
for (unsigned idx = 0; idx < 2; ++idx) {
571+
MemoryBufferRef ref = *unwrap(inBuffers[idx]);
572+
localInMemBufs[idx] =
573+
MemoryBuffer::getMemBuffer(ref.getBuffer(), inBuffersIDs[idx],
574+
/*RequiresNullTerminator*/ false);
575+
localInMemBufRefs[idx] = localInMemBufs[idx]->getMemBufferRef();
576+
}
577+
578+
std::string linkerScript = creteEVMLinkerScript(
579+
ArrayRef(inBuffers, numInBuffers), ArrayRef(inBuffersIDs, numInBuffers));
580+
std::unique_ptr<MemoryBuffer> scriptBuf =
581+
MemoryBuffer::getMemBuffer(linkerScript, "script.x");
582+
localInMemBufRefs[2] = scriptBuf->getMemBufferRef();
583+
584+
SmallVector<const char *, 16> lldArgs;
585+
lldArgs.push_back("ld.lld");
586+
lldArgs.push_back("-T");
587+
lldArgs.push_back("script.x");
588+
589+
// Use remapping of file names (a linker feature) to replace file names with
590+
// indexes in the array of memory buffers.
591+
Twine remapStr("--remap-inputs=");
592+
std::string remapDeployStr = (remapStr + inBuffersIDs[0] + "=0").str();
593+
lldArgs.push_back(remapDeployStr.c_str());
594+
595+
std::string remapDeployedStr = (remapStr + inBuffersIDs[1] + "=1").str();
596+
lldArgs.push_back(remapDeployedStr.c_str());
597+
598+
lldArgs.push_back("--remap-inputs=script.x=2");
599+
600+
// Deploy code
601+
lldArgs.push_back(inBuffersIDs[0]);
602+
// Deployed code
603+
lldArgs.push_back(inBuffersIDs[1]);
604+
605+
lldArgs.push_back("--oformat=binary");
606+
607+
SmallString<0> codeString;
608+
raw_svector_ostream ostream(codeString);
609+
SmallString<0> errorString;
610+
raw_svector_ostream errorOstream(errorString);
611+
612+
// Lld-as-a-library is not thread safe, as it has a global state,
613+
// so we need to protect lld from simultaneous access from different threads.
614+
std::unique_lock<std::mutex> lock(lldMutex);
615+
const lld::Result s =
616+
lld::lldMainMemBuf(localInMemBufRefs, &ostream, lldArgs, outs(),
617+
errorOstream, {{lld::Gnu, &lld::elf::linkMemBuf}});
618+
lock.unlock();
619+
620+
bool ret = !s.retCode && s.canRunAgain;
621+
if (!ret) {
622+
*errorMessage = strdup(errorString.c_str());
623+
return true;
624+
}
625+
626+
StringRef data = ostream.str();
627+
// Linker script adds size of the deploy code as a 8-byte BE unsigned to the
628+
// end of .text section. Knowing this, we can extract final deploy and
629+
// deployed codes.
630+
assert(data.size() > 4);
631+
size_t deploySize = support::endian::read32be(data.data() + data.size() - 4);
632+
assert(deploySize < data.size());
633+
size_t deployedSize = data.size() - deploySize - 4;
634+
635+
outBuffers[0] = LLVMCreateMemoryBufferWithMemoryRangeCopy(
636+
data.data(), deploySize, "deploy");
637+
outBuffers[1] = LLVMCreateMemoryBufferWithMemoryRangeCopy(
638+
data.data() + deploySize, deployedSize, "deployed");
639+
640+
return false;
641+
}

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ enum {
431431

432432
// EVM local begin
433433
// ELF Relocation types for EVM
434-
enum {
434+
enum : uint8_t {
435435
#include "ELFRelocs/EVM.def"
436436
};
437437
// EVM local end

llvm/lib/Object/ELF.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
185185
case ELF::EM_ERAVM:
186186
switch (Type) {
187187
#include "llvm/BinaryFormat/ELFRelocs/EraVM.def"
188-
default:
188+
default:
189189
break;
190190
}
191191
break;
@@ -251,10 +251,6 @@ uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) {
251251
break;
252252
case ELF::EM_LOONGARCH:
253253
return ELF::R_LARCH_RELATIVE;
254-
// EVM local begin
255-
case ELF::EM_EVM:
256-
break;
257-
// EVM local end
258254
default:
259255
break;
260256
}

llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,22 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "MCTargetDesc/EVMTargetStreamer.h"
14+
#include "llvm/BinaryFormat/ELF.h"
15+
#include "llvm/MC/MCSymbolELF.h"
16+
#include "llvm/Support/Casting.h"
1417

1518
using namespace llvm;
1619

1720
// EVMTargetStreamer implementations
1821

1922
EVMTargetStreamer::EVMTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
2023

21-
EVMTargetStreamer::~EVMTargetStreamer() = default;
24+
void EVMTargetStreamer::emitLabel(MCSymbol *Symbol) {
25+
// This is mostly a workaround for the current linking scheme.
26+
// Mark all the symbols as local to their translation units.
27+
auto *ELFSymbol = cast<MCSymbolELF>(Symbol);
28+
ELFSymbol->setBinding(ELF::STB_LOCAL);
29+
}
2230

2331
EVMTargetObjStreamer::EVMTargetObjStreamer(MCStreamer &S)
2432
: EVMTargetStreamer(S) {}

llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ class EVMTargetStreamer : public MCTargetStreamer {
2424
EVMTargetStreamer(EVMTargetStreamer &&) = delete;
2525
EVMTargetStreamer &operator=(const EVMTargetStreamer &) = delete;
2626
EVMTargetStreamer &operator=(EVMTargetStreamer &&) = delete;
27-
~EVMTargetStreamer() override;
27+
28+
void emitLabel(MCSymbol *Symbol) override;
2829
};
2930

3031
/// This part is for ASCII assembly output

0 commit comments

Comments
 (0)