@@ -53,6 +53,54 @@ using namespace solidity::evmasm;
5353using namespace solidity ::langutil;
5454using namespace solidity ::util;
5555
56+ namespace
57+ {
58+
59+ // / Produces instruction location info in RAII style. When an assembly instruction is added to the bytecode,
60+ // / this class can be instantiated in that scope. It will record the current bytecode size (before addition)
61+ // / and, at destruction time, record the new bytecode size. This information is then added to an external
62+ // / instruction locations vector.
63+ // / If the instruction decomposes into multiple individual evm instructions, `emit` can be
64+ // / called for all but the last one (which will be emitted by the destructor).
65+ class InstructionLocationEmitter
66+ {
67+ public:
68+ InstructionLocationEmitter (
69+ std::vector<LinkerObject::InstructionLocation>& _instructionLocations,
70+ bytes const & _bytecode,
71+ size_t const _assemblyItemIndex
72+ ):
73+ m_instructionLocations (_instructionLocations),
74+ m_bytecode (_bytecode),
75+ m_assemblyItemIndex (_assemblyItemIndex),
76+ m_instructionLocationStart (_bytecode.size())
77+ {}
78+
79+ ~InstructionLocationEmitter ()
80+ {
81+ emit ();
82+ }
83+
84+ void emit ()
85+ {
86+ auto const end = m_bytecode.size ();
87+ m_instructionLocations.push_back (LinkerObject::InstructionLocation{
88+ .start = m_instructionLocationStart,
89+ .end = end,
90+ .assemblyItemIndex = m_assemblyItemIndex
91+ });
92+ m_instructionLocationStart = end;
93+ }
94+
95+ private:
96+ std::vector<LinkerObject::InstructionLocation>& m_instructionLocations;
97+ bytes const & m_bytecode;
98+ size_t const m_assemblyItemIndex{};
99+ size_t m_instructionLocationStart{};
100+ };
101+
102+ }
103+
56104std::map<std::string, std::shared_ptr<std::string const >> Assembly::s_sharedSourceNames;
57105
58106AssemblyItem const & Assembly::append (AssemblyItem _i)
@@ -1281,104 +1329,72 @@ LinkerObject const& Assembly::assembleLegacy() const
12811329 uint8_t dataRefPush = static_cast <uint8_t >(pushInstruction (bytesPerDataRef));
12821330
12831331 LinkerObject::CodeSectionLocation codeSectionLocation;
1332+ codeSectionLocation.instructionLocations .reserve (items.size ());
12841333 codeSectionLocation.start = 0 ;
1285- size_t assemblyItemIndex = 0 ;
1286- auto assembleInstruction = [&](auto && _addInstruction) {
1287- size_t start = ret.bytecode .size ();
1288- _addInstruction ();
1289- size_t end = ret.bytecode .size ();
1290- codeSectionLocation.instructionLocations .emplace_back (
1291- LinkerObject::InstructionLocation{
1292- .start = start,
1293- .end = end,
1294- .assemblyItemIndex = assemblyItemIndex
1295- }
1296- );
1297- };
1298- for (AssemblyItem const & item: items)
1334+ for (auto const & [assemblyItemIndex, item]: items | ranges::views::enumerate)
12991335 {
1336+ // collect instruction locations via side effects
1337+ InstructionLocationEmitter instructionLocationEmitter (codeSectionLocation.instructionLocations , ret.bytecode , assemblyItemIndex);
13001338 // store position of the invalid jump destination
13011339 if (item.type () != Tag && m_tagPositionsInBytecode[0 ] == std::numeric_limits<size_t >::max ())
13021340 m_tagPositionsInBytecode[0 ] = ret.bytecode .size ();
13031341
13041342 switch (item.type ())
13051343 {
13061344 case Operation:
1307- assembleInstruction ([&](){
1308- ret.bytecode += assembleOperation (item);
1309- });
1345+ ret.bytecode += assembleOperation (item);
13101346 break ;
13111347 case Push:
1312- assembleInstruction ([&](){
1313- ret.bytecode += assemblePush (item);
1314- });
1348+ ret.bytecode += assemblePush (item);
13151349 break ;
13161350 case PushTag:
1317- {
1318- assembleInstruction ([&](){
1319- ret.bytecode .push_back (tagPush);
1320- tagRefs[ret.bytecode .size ()] = item.splitForeignPushTag ();
1321- ret.bytecode .resize (ret.bytecode .size () + bytesPerTag);
1322- });
1351+ ret.bytecode .push_back (tagPush);
1352+ tagRefs[ret.bytecode .size ()] = item.splitForeignPushTag ();
1353+ ret.bytecode .resize (ret.bytecode .size () + bytesPerTag);
13231354 break ;
1324- }
13251355 case PushData:
1326- assembleInstruction ([&]() {
1327- ret.bytecode .push_back (dataRefPush);
1328- dataRefs.insert (std::make_pair (h256 (item.data ()), ret.bytecode .size ()));
1329- ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
1330- });
1356+ ret.bytecode .push_back (dataRefPush);
1357+ dataRefs.insert (std::make_pair (h256 (item.data ()), ret.bytecode .size ()));
1358+ ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
13311359 break ;
13321360 case PushSub:
1333- assembleInstruction ([&]() {
1334- assertThrow (item.data () <= std::numeric_limits<size_t >::max (), AssemblyException, " " );
1335- ret.bytecode .push_back (dataRefPush);
1336- subRefs.insert (std::make_pair (static_cast <size_t >(item.data ()), ret.bytecode .size ()));
1337- ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
1338- });
1361+ assertThrow (item.data () <= std::numeric_limits<size_t >::max (), AssemblyException, " " );
1362+ ret.bytecode .push_back (dataRefPush);
1363+ subRefs.insert (std::make_pair (static_cast <size_t >(item.data ()), ret.bytecode .size ()));
1364+ ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
13391365 break ;
13401366 case PushSubSize:
13411367 {
1342- assembleInstruction ([&](){
1343- assertThrow (item.data () <= std::numeric_limits<size_t >::max (), AssemblyException, " " );
1344- auto s = subAssemblyById (static_cast <size_t >(item.data ()))->assemble ().bytecode .size ();
1345- item.setPushedValue (u256 (s));
1346- unsigned b = std::max<unsigned >(1 , numberEncodingSize (s));
1347- ret.bytecode .push_back (static_cast <uint8_t >(pushInstruction (b)));
1348- ret.bytecode .resize (ret.bytecode .size () + b);
1349- bytesRef byr (&ret.bytecode .back () + 1 - b, b);
1350- toBigEndian (s, byr);
1351- });
1368+ assertThrow (item.data () <= std::numeric_limits<size_t >::max (), AssemblyException, " " );
1369+ auto s = subAssemblyById (static_cast <size_t >(item.data ()))->assemble ().bytecode .size ();
1370+ item.setPushedValue (u256 (s));
1371+ unsigned b = std::max<unsigned >(1 , numberEncodingSize (s));
1372+ ret.bytecode .push_back (static_cast <uint8_t >(pushInstruction (b)));
1373+ ret.bytecode .resize (ret.bytecode .size () + b);
1374+ bytesRef byr (&ret.bytecode .back () + 1 - b, b);
1375+ toBigEndian (s, byr);
13521376 break ;
13531377 }
13541378 case PushProgramSize:
1355- {
1356- assembleInstruction ([&](){
1357- ret.bytecode .push_back (dataRefPush);
1358- sizeRefs.push_back (static_cast <unsigned >(ret.bytecode .size ()));
1359- ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
1360- });
1379+ ret.bytecode .push_back (dataRefPush);
1380+ sizeRefs.push_back (static_cast <unsigned >(ret.bytecode .size ()));
1381+ ret.bytecode .resize (ret.bytecode .size () + bytesPerDataRef);
13611382 break ;
1362- }
13631383 case PushLibraryAddress:
13641384 {
1365- assembleInstruction ([&]() {
1366- auto const [bytecode, linkRef] = assemblePushLibraryAddress (item, ret.bytecode .size ());
1367- ret.bytecode += bytecode;
1368- ret.linkReferences .insert (linkRef);
1369- });
1385+ auto const [bytecode, linkRef] = assemblePushLibraryAddress (item, ret.bytecode .size ());
1386+ ret.bytecode += bytecode;
1387+ ret.linkReferences .insert (linkRef);
13701388 break ;
13711389 }
13721390 case PushImmutable:
1373- assembleInstruction ([&]() {
1374- ret.bytecode .push_back (static_cast <uint8_t >(Instruction::PUSH32));
1375- // Maps keccak back to the "identifier" std::string of that immutable.
1376- ret.immutableReferences [item.data ()].first = m_immutables.at (item.data ());
1377- // Record the bytecode offset of the PUSH32 argument.
1378- ret.immutableReferences [item.data ()].second .emplace_back (ret.bytecode .size ());
1379- // Advance bytecode by 32 bytes (default initialized).
1380- ret.bytecode .resize (ret.bytecode .size () + 32 );
1381- });
1391+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::PUSH32));
1392+ // Maps keccak back to the "identifier" std::string of that immutable.
1393+ ret.immutableReferences [item.data ()].first = m_immutables.at (item.data ());
1394+ // Record the bytecode offset of the PUSH32 argument.
1395+ ret.immutableReferences [item.data ()].second .emplace_back (ret.bytecode .size ());
1396+ // Advance bytecode by 32 bytes (default initialized).
1397+ ret.bytecode .resize (ret.bytecode .size () + 32 );
13821398 break ;
13831399 case VerbatimBytecode:
13841400 ret.bytecode += assembleVerbatimBytecode (item);
@@ -1391,53 +1407,41 @@ LinkerObject const& Assembly::assembleLegacy() const
13911407 {
13921408 if (i != offsets.size () - 1 )
13931409 {
1394- assembleInstruction ([&]() {
1395- ret.bytecode .push_back (uint8_t (Instruction::DUP2));
1396- });
1397- assembleInstruction ([&]() {
1398- ret.bytecode .push_back (uint8_t (Instruction::DUP2));
1399- });
1410+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::DUP2));
1411+ // This item type decomposes into multiple evm instructions, so we manually call emit()
1412+ instructionLocationEmitter.emit ();
1413+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::DUP2));
1414+ instructionLocationEmitter.emit ();
14001415 }
1401- assembleInstruction ([&]() {
1402- // TODO: should we make use of the constant optimizer methods for pushing the offsets?
1403- bytes offsetBytes = toCompactBigEndian (u256 (offsets[i]));
1404- ret.bytecode .push_back (static_cast <uint8_t >(pushInstruction (static_cast <unsigned >(offsetBytes.size ()))));
1405- ret.bytecode += offsetBytes;
1406- });
1407- assembleInstruction ([&]() {
1408- ret.bytecode .push_back (uint8_t (Instruction::ADD));
1409- });
1410- assembleInstruction ([&]() {
1411- ret.bytecode .push_back (uint8_t (Instruction::MSTORE));
1412- });
1416+ // TODO: should we make use of the constant optimizer methods for pushing the offsets?
1417+ bytes offsetBytes = toCompactBigEndian (u256 (offsets[i]));
1418+ ret.bytecode .push_back (static_cast <uint8_t >(pushInstruction (static_cast <unsigned >(offsetBytes.size ()))));
1419+ ret.bytecode += offsetBytes;
1420+ instructionLocationEmitter.emit ();
1421+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::ADD));
1422+ instructionLocationEmitter.emit ();
1423+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::MSTORE));
1424+ // No emit needed here, it's taken care of by the destructor of instructionLocationEmitter.
14131425 }
14141426 if (offsets.empty ())
14151427 {
1416- assembleInstruction ([&]() {
1417- ret.bytecode .push_back (uint8_t (Instruction::POP));
1418- });
1419- assembleInstruction ([&]() {
1420- ret.bytecode .push_back (uint8_t (Instruction::POP));
1421- });
1428+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::POP));
1429+ instructionLocationEmitter.emit ();
1430+ ret.bytecode .push_back (static_cast <uint8_t >(Instruction::POP));
1431+ // no emit needed here, it's taken care of by the destructor of instructionLocationEmitter
14221432 }
14231433 immutableReferencesBySub.erase (item.data ());
14241434 break ;
14251435 }
14261436 case PushDeployTimeAddress:
1427- assembleInstruction ([&]() {
1428- ret.bytecode += assemblePushDeployTimeAddress ();
1429- });
1437+ ret.bytecode += assemblePushDeployTimeAddress ();
14301438 break ;
14311439 case Tag:
1432- assembleInstruction ([&](){
1433- ret.bytecode += assembleTag (item, ret.bytecode .size (), true );
1434- });
1440+ ret.bytecode += assembleTag (item, ret.bytecode .size (), true );
14351441 break ;
14361442 default :
14371443 solAssert (false , " Unexpected opcode while assembling." );
14381444 }
1439-
1440- ++assemblyItemIndex;
14411445 }
14421446
14431447 codeSectionLocation.end = ret.bytecode .size ();
@@ -1606,9 +1610,17 @@ LinkerObject const& Assembly::assembleEOF() const
16061610 for (auto && [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate)
16071611 {
16081612 auto const sectionStart = ret.bytecode .size ();
1613+
1614+ std::vector<LinkerObject::InstructionLocation> instructionLocations;
1615+ instructionLocations.reserve (codeSection.items .size ());
1616+
16091617 solAssert (!codeSection.items .empty (), " Empty code section." );
1610- for (AssemblyItem const & item: codeSection.items )
1618+
1619+ for (auto const & [assemblyItemIndex, item]: codeSection.items | ranges::views::enumerate)
16111620 {
1621+ // collect instruction locations via side effects
1622+ InstructionLocationEmitter instructionLocationEmitter {instructionLocations, ret.bytecode , assemblyItemIndex};
1623+
16121624 // store position of the invalid jump destination
16131625 if (item.type () != Tag && m_tagPositionsInBytecode[0 ] == std::numeric_limits<size_t >::max ())
16141626 m_tagPositionsInBytecode[0 ] = ret.bytecode .size ();
@@ -1724,6 +1736,12 @@ LinkerObject const& Assembly::assembleEOF() const
17241736 " Code section too large for EOF."
17251737 );
17261738 setBigEndianUint16 (ret.bytecode , codeSectionSizePositions[codeSectionIndex], ret.bytecode .size () - sectionStart);
1739+
1740+ ret.codeSectionLocations .push_back (LinkerObject::CodeSectionLocation{
1741+ .start = sectionStart,
1742+ .end = ret.bytecode .size (),
1743+ .instructionLocations = std::move (instructionLocations)
1744+ });
17271745 }
17281746
17291747 for (auto const & [refPos, tagId]: tagRef)
0 commit comments