@@ -443,3 +443,199 @@ void LLVMDisposeUndefinedLinkerSymbolsEraVM(char *linkerSymbolNames[],
443443 std::free (linkerSymbolNames[idx]);
444444 std::free (linkerSymbolNames);
445445}
446+
447+ // ----------------------------------------------------------------------------//
448+
449+ // / This function generates a linker script for EVM architecture.
450+ // / \p memBufs - array of input memory buffers with following structure:
451+ // /
452+ // / memBufs[0] - deploy object code
453+ // / memBufs[1] - deployed object code
454+ // / --------------------------
455+ // / memBufs[2] - 1-st sub-contract (final EVM bytecode)
456+ // / ...
457+ // / memBufs[N] - N-st sub-contract (final EVM bytecode)
458+ // /
459+ // / Sub-contracts are optional. They should have the same ordering as in
460+ // / the YUL layout.
461+ // /
462+ // / \p bufIDs - array of string identifiers of the buffers. IDs correspond
463+ // / to the object names in the YUL layout.
464+ // /
465+ // / For example, the YUL object:
466+ // /
467+ // / |--D_105_deploy --||--D_105_deployed --||-- B_40 --|
468+ // /
469+ // / __datasize_B_40 = 1384;
470+ // / SECTIONS {
471+ // / . = 0;
472+ // / .text : SUBALIGN(1) {
473+ // / D_105(.text);
474+ // / __dataoffset_D_105_deployed = .;
475+ // / D_105_deployed(.text);
476+ // / __datasize_D_105_deployed = . - __dataoffset_D_105_deployed;
477+ // / __dataoffset_B_40 = .;
478+ // / __datasize_D_105 = __dataoffset_B_40 + __datasize_B_40;
479+ // / LONG(__dataoffset_D_105_deployed);
480+ // / }
481+ // /
482+ // / The dot '.' denotes current location in the resulting file.
483+ // / The purpose of the script is to define datasize/dataoffset absolute symbols
484+ // / that reflect the YUL layout.
485+ static std::string creteEVMLinkerScript (ArrayRef<LLVMMemoryBufferRef> memBufs,
486+ ArrayRef<const char *> bufIDs) {
487+ assert (memBufs.size () == bufIDs.size ());
488+ size_t numObjectsToLink = memBufs.size ();
489+ StringRef dataSizePrefix (" __datasize_" );
490+ StringRef dataOffsetPrefix (" __dataoffset_" );
491+
492+ // Define the script part related to the top-level contract.
493+ StringRef topName (bufIDs[0 ]);
494+ StringRef deployed (bufIDs[1 ]);
495+
496+ // Contains the linker script part corresponding to the top-level contract.
497+ // For the example above, this contains:
498+ // D_105(.text);
499+ // __dataoffset_D_105_deployed = .;
500+ // D_105_deployed(.text);
501+ // __datasize_D_105_deployed = . - __dataoffset_D_105_deployed;
502+ Twine topLevel = topName + " (.text);\n " + dataOffsetPrefix + deployed +
503+ " = .;\n " + deployed + " (.text);\n " + dataSizePrefix +
504+ deployed + " = . - " + dataOffsetPrefix + deployed + " ;\n " ;
505+
506+ // Contains symbols whose values are the sizes of the dependent contracts.
507+ // For the example above, this contains:
508+ // __datasize_B_40 = 1384;
509+ std::string symDatasizeDeps;
510+
511+ // Contains symbols whose values are the offsets of the dependent contracts.
512+ // For the example above, this contains:
513+ // __dataoffset_B_40 = .;
514+ std::string symDataOffsetDeps;
515+ if (numObjectsToLink > 2 ) {
516+ // Define datasize symbols for the dependent contracts. They start after
517+ // {deploy, deployed} pair of the top-level contract, i.e. at index 2.
518+ for (unsigned idx = 2 ; idx < numObjectsToLink; ++idx)
519+ symDatasizeDeps += (dataSizePrefix + bufIDs[idx] + " = " +
520+ Twine (LLVMGetBufferSize (memBufs[idx])) + " ;\n " )
521+ .str ();
522+
523+ symDataOffsetDeps = (dataOffsetPrefix + bufIDs[2 ] + " = .;\n " ).str ();
524+ for (unsigned idx = 3 ; idx < numObjectsToLink; ++idx)
525+ symDataOffsetDeps +=
526+ (dataOffsetPrefix + bufIDs[idx] + " = " + dataOffsetPrefix +
527+ bufIDs[idx - 1 ] + " + " + dataSizePrefix + bufIDs[idx - 1 ] + " ;\n " )
528+ .str ();
529+ }
530+
531+ // Contains a symbol whose value is the total size of the top-level contract
532+ // with all the dependencies.
533+ std::string symDatasizeTop = (dataSizePrefix + topName + " = " ).str ();
534+ if (numObjectsToLink > 2 )
535+ symDatasizeTop += (dataOffsetPrefix + bufIDs.back () + " + " +
536+ dataSizePrefix + bufIDs.back () + " ;\n " )
537+ .str ();
538+ else
539+ symDatasizeTop += " .;\n " ;
540+
541+ // Emit size of the deploy code offset as the 4-byte unsigned integer.
542+ // This is needed to determine which offset the deployed code starts at
543+ // in the linked binary.
544+ Twine deploySize = " LONG(" + dataOffsetPrefix + deployed + " );\n " ;
545+
546+ Twine script = formatv (" {0}\n \
547+ ENTRY(0);\n \
548+ SECTIONS {\n \
549+ . = 0;\n \
550+ .code : SUBALIGN(1) {\n \
551+ {1}\
552+ {2}\
553+ {3}\
554+ {4}\
555+ }\n \
556+ }\n \
557+ " ,
558+ symDatasizeDeps, topLevel, symDataOffsetDeps,
559+ symDatasizeTop, deploySize);
560+
561+ return script.str ();
562+ }
563+
564+ LLVMBool LLVMLinkEVM (LLVMMemoryBufferRef inBuffers[],
565+ const char *inBuffersIDs[], uint64_t numInBuffers,
566+ LLVMMemoryBufferRef outBuffers[2 ], char **errorMessage) {
567+ assert (numInBuffers > 1 );
568+ SmallVector<MemoryBufferRef> localInMemBufRefs (3 );
569+ SmallVector<std::unique_ptr<MemoryBuffer>> localInMemBufs (3 );
570+ for (unsigned idx = 0 ; idx < 2 ; ++idx) {
571+ MemoryBufferRef ref = *unwrap (inBuffers[idx]);
572+ localInMemBufs[idx] =
573+ MemoryBuffer::getMemBuffer (ref.getBuffer (), inBuffersIDs[idx],
574+ /* RequiresNullTerminator*/ false );
575+ localInMemBufRefs[idx] = localInMemBufs[idx]->getMemBufferRef ();
576+ }
577+
578+ std::string linkerScript = creteEVMLinkerScript (
579+ ArrayRef (inBuffers, numInBuffers), ArrayRef (inBuffersIDs, numInBuffers));
580+ std::unique_ptr<MemoryBuffer> scriptBuf =
581+ MemoryBuffer::getMemBuffer (linkerScript, " script.x" );
582+ localInMemBufRefs[2 ] = scriptBuf->getMemBufferRef ();
583+
584+ SmallVector<const char *, 16 > lldArgs;
585+ lldArgs.push_back (" ld.lld" );
586+ lldArgs.push_back (" -T" );
587+ lldArgs.push_back (" script.x" );
588+
589+ // Use remapping of file names (a linker feature) to replace file names with
590+ // indexes in the array of memory buffers.
591+ Twine remapStr (" --remap-inputs=" );
592+ std::string remapDeployStr = (remapStr + inBuffersIDs[0 ] + " =0" ).str ();
593+ lldArgs.push_back (remapDeployStr.c_str ());
594+
595+ std::string remapDeployedStr = (remapStr + inBuffersIDs[1 ] + " =1" ).str ();
596+ lldArgs.push_back (remapDeployedStr.c_str ());
597+
598+ lldArgs.push_back (" --remap-inputs=script.x=2" );
599+
600+ // Deploy code
601+ lldArgs.push_back (inBuffersIDs[0 ]);
602+ // Deployed code
603+ lldArgs.push_back (inBuffersIDs[1 ]);
604+
605+ lldArgs.push_back (" --oformat=binary" );
606+
607+ SmallString<0 > codeString;
608+ raw_svector_ostream ostream (codeString);
609+ SmallString<0 > errorString;
610+ raw_svector_ostream errorOstream (errorString);
611+
612+ // Lld-as-a-library is not thread safe, as it has a global state,
613+ // so we need to protect lld from simultaneous access from different threads.
614+ std::unique_lock<std::mutex> lock (lldMutex);
615+ const lld::Result s =
616+ lld::lldMainMemBuf (localInMemBufRefs, &ostream, lldArgs, outs (),
617+ errorOstream, {{lld::Gnu, &lld::elf::linkMemBuf}});
618+ lock.unlock ();
619+
620+ bool ret = !s.retCode && s.canRunAgain ;
621+ if (!ret) {
622+ *errorMessage = strdup (errorString.c_str ());
623+ return true ;
624+ }
625+
626+ StringRef data = ostream.str ();
627+ // Linker script adds size of the deploy code as a 8-byte BE unsigned to the
628+ // end of .text section. Knowing this, we can extract final deploy and
629+ // deployed codes.
630+ assert (data.size () > 4 );
631+ size_t deploySize = support::endian::read32be (data.data () + data.size () - 4 );
632+ assert (deploySize < data.size ());
633+ size_t deployedSize = data.size () - deploySize - 4 ;
634+
635+ outBuffers[0 ] = LLVMCreateMemoryBufferWithMemoryRangeCopy (
636+ data.data (), deploySize, " deploy" );
637+ outBuffers[1 ] = LLVMCreateMemoryBufferWithMemoryRangeCopy (
638+ data.data () + deploySize, deployedSize, " deployed" );
639+
640+ return false ;
641+ }
0 commit comments