Skip to content

Commit 459ad24

Browse files
authored
Merge pull request #15955 from ethereum/fix_asm_subpath_obj_id
Fix inconsistent negative subassembly indices between different sizeof(size_t)
2 parents f7058d8 + f6b95db commit 459ad24

35 files changed

+432
-129
lines changed

Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Compiler Features:
66
* ethdebug: Experimental support for instructions and source locations under EOF.
77

88
Bugfixes:
9+
* Assembler: Fix not using a fixed-width type for IDs being assigned to subassemblies nested more than one level away, resulting in inconsistent `--asm-json` output between target architectures.
910

1011
### 0.8.30 (2025-05-07)
1112

libevmasm/Assembly.cpp

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <fmt/format.h>
4040

4141
#include <range/v3/algorithm/any_of.hpp>
42+
#include <range/v3/algorithm/find_if.hpp>
4243
#include <range/v3/view/drop_exactly.hpp>
4344
#include <range/v3/view/enumerate.hpp>
4445
#include <range/v3/view/map.hpp>
@@ -715,17 +716,17 @@ std::pair<std::shared_ptr<Assembly>, std::vector<std::string>> Assembly::fromJSO
715716
return std::make_pair(result, _level == 0 ? parsedSourceList : std::vector<std::string>{});
716717
}
717718

718-
void Assembly::encodeAllPossibleSubPathsInAssemblyTree(std::vector<size_t> _pathFromRoot, std::vector<Assembly*> _assembliesOnPath)
719+
void Assembly::encodeAllPossibleSubPathsInAssemblyTree(std::vector<SubAssemblyID> _pathFromRoot, std::vector<Assembly*> _assembliesOnPath)
719720
{
720721
_assembliesOnPath.push_back(this);
721-
for (_pathFromRoot.push_back(0); _pathFromRoot.back() < m_subs.size(); ++_pathFromRoot.back())
722+
for (_pathFromRoot.push_back(SubAssemblyID{0}); _pathFromRoot.back().value < m_subs.size(); ++_pathFromRoot.back().value)
722723
{
723724
for (size_t distanceFromRoot = 0; distanceFromRoot < _assembliesOnPath.size(); ++distanceFromRoot)
724725
_assembliesOnPath[distanceFromRoot]->encodeSubPath(
725726
_pathFromRoot | ranges::views::drop_exactly(distanceFromRoot) | ranges::to<std::vector>
726727
);
727728

728-
m_subs[_pathFromRoot.back()]->encodeAllPossibleSubPathsInAssemblyTree(_pathFromRoot, _assembliesOnPath);
729+
m_subs[_pathFromRoot.back().asIndex()]->encodeAllPossibleSubPathsInAssemblyTree(_pathFromRoot, _assembliesOnPath);
729730
}
730731
}
731732

@@ -846,20 +847,20 @@ std::map<u256, u256> const& Assembly::optimiseInternal(
846847

847848
// Run optimisation for sub-assemblies.
848849
// TODO: verify and double-check this for EOF.
849-
for (size_t subId = 0; subId < m_subs.size(); ++subId)
850+
for (SubAssemblyID subID{0}; subID.value < m_subs.size(); ++subID.value)
850851
{
851852
OptimiserSettings settings = _settings;
852-
Assembly& sub = *m_subs[subId];
853+
Assembly& sub = *m_subs[subID.asIndex()];
853854
std::set<size_t> referencedTags;
854855
for (auto& codeSection: m_codeSections)
855-
referencedTags += JumpdestRemover::referencedTags(codeSection.items, subId);
856+
referencedTags += JumpdestRemover::referencedTags(codeSection.items, subID);
856857
std::map<u256, u256> const& subTagReplacements = sub.optimiseInternal(
857858
settings,
858859
referencedTags
859860
);
860861
// Apply the replacements (can be empty).
861862
for (auto& codeSection: m_codeSections)
862-
BlockDeduplicator::applyTagReplacement(codeSection.items, subTagReplacements, subId);
863+
BlockDeduplicator::applyTagReplacement(codeSection.items, subTagReplacements, subID);
863864
}
864865

865866
std::map<u256, u256> tagReplacements;
@@ -1235,7 +1236,7 @@ LinkerObject const& Assembly::assemble() const
12351236
[[nodiscard]] bytes Assembly::assembleTag(AssemblyItem const& _item, size_t _pos, bool _addJumpDest) const
12361237
{
12371238
solRequire(_item.data() != 0, AssemblyException, "Invalid tag position.");
1238-
solRequire(_item.splitForeignPushTag().first == std::numeric_limits<size_t>::max(), AssemblyException, "Foreign tag.");
1239+
solRequire(_item.splitForeignPushTag().first.empty(), AssemblyException, "Foreign tag.");
12391240
solRequire(_pos < 0xffffffffL, AssemblyException, "Tag too large.");
12401241
size_t tagId = static_cast<size_t>(_item.data());
12411242
solRequire(m_tagPositionsInBytecode[tagId] == std::numeric_limits<size_t>::max(), AssemblyException, "Duplicate tag position.");
@@ -1306,10 +1307,10 @@ LinkerObject const& Assembly::assembleLegacy() const
13061307
if (item.type() == PushTag)
13071308
{
13081309
auto [subId, tagId] = item.splitForeignPushTag();
1309-
if (subId == std::numeric_limits<size_t>::max())
1310+
if (subId.empty())
13101311
continue;
1311-
assertThrow(subId < m_subs.size(), AssemblyException, "Invalid sub id");
1312-
auto subTagPosition = m_subs[subId]->m_tagPositionsInBytecode.at(tagId);
1312+
solAssert(subId.value < m_subs.size(), "Invalid sub id");
1313+
auto subTagPosition = m_subs[subId.asIndex()]->m_tagPositionsInBytecode.at(tagId);
13131314
assertThrow(subTagPosition != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position.");
13141315
bytesPerTag = std::max(bytesPerTag, numberEncodingSize(subTagPosition));
13151316
}
@@ -1358,15 +1359,15 @@ LinkerObject const& Assembly::assembleLegacy() const
13581359
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13591360
break;
13601361
case PushSub:
1361-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1362+
solAssert(item.data() <= std::numeric_limits<SubAssemblyID::ValueType>::max());
13621363
ret.bytecode.push_back(dataRefPush);
1363-
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1364+
subRefs.emplace(SubAssemblyID{item.data()}, ret.bytecode.size());
13641365
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
13651366
break;
13661367
case PushSubSize:
13671368
{
1368-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1369-
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1369+
solAssert(item.data() <= std::numeric_limits<SubAssemblyID::ValueType>::max());
1370+
auto s = subAssemblyById(SubAssemblyID{item.data()})->assemble().bytecode.size();
13701371
item.setPushedValue(u256(s));
13711372
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
13721373
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
@@ -1481,14 +1482,12 @@ LinkerObject const& Assembly::assembleLegacy() const
14811482
}
14821483
for (auto const& i: tagRefs)
14831484
{
1484-
size_t subId;
1485-
size_t tagId;
1486-
std::tie(subId, tagId) = i.second;
1487-
assertThrow(subId == std::numeric_limits<size_t>::max() || subId < m_subs.size(), AssemblyException, "Invalid sub id");
1485+
auto [subId, tagId] = i.second;
1486+
solAssert(subId.empty() || subId.value < m_subs.size(), "Invalid sub id");
14881487
std::vector<size_t> const& tagPositions =
1489-
subId == std::numeric_limits<size_t>::max() ?
1488+
subId.empty() ?
14901489
m_tagPositionsInBytecode :
1491-
m_subs[subId]->m_tagPositionsInBytecode;
1490+
m_subs[subId.asIndex()]->m_tagPositionsInBytecode;
14921491
assertThrow(tagId < tagPositions.size(), AssemblyException, "Reference to non-existing tag.");
14931492
size_t pos = tagPositions[tagId];
14941493
assertThrow(pos != std::numeric_limits<size_t>::max(), AssemblyException, "Reference to tag without position.");
@@ -1813,47 +1812,46 @@ LinkerObject const& Assembly::assembleEOF() const
18131812
return ret;
18141813
}
18151814

1816-
std::vector<size_t> Assembly::decodeSubPath(size_t _subObjectId) const
1815+
std::vector<SubAssemblyID> Assembly::decodeSubPath(SubAssemblyID _subObjectId) const
18171816
{
1818-
if (_subObjectId < m_subs.size())
1817+
if (_subObjectId.value < m_subs.size())
18191818
return {_subObjectId};
18201819

1821-
auto subIdPathIt = find_if(
1822-
m_subPaths.begin(),
1823-
m_subPaths.end(),
1820+
auto subIdPathIt = ranges::find_if(
1821+
m_subPaths,
18241822
[_subObjectId](auto const& subId) { return subId.second == _subObjectId; }
18251823
);
18261824

18271825
assertThrow(subIdPathIt != m_subPaths.end(), AssemblyException, "");
18281826
return subIdPathIt->first;
18291827
}
18301828

1831-
size_t Assembly::encodeSubPath(std::vector<size_t> const& _subPath)
1829+
SubAssemblyID Assembly::encodeSubPath(std::vector<SubAssemblyID> const& _subPath)
18321830
{
18331831
assertThrow(!_subPath.empty(), AssemblyException, "");
18341832
if (_subPath.size() == 1)
18351833
{
1836-
assertThrow(_subPath[0] < m_subs.size(), AssemblyException, "");
1834+
solAssert(_subPath[0].value < m_subs.size());
18371835
return _subPath[0];
18381836
}
18391837

1840-
if (m_subPaths.find(_subPath) == m_subPaths.end())
1838+
if (!m_subPaths.contains(_subPath))
18411839
{
1842-
size_t objectId = std::numeric_limits<size_t>::max() - m_subPaths.size();
1843-
assertThrow(objectId >= m_subs.size(), AssemblyException, "");
1840+
SubAssemblyID const objectId{std::numeric_limits<SubAssemblyID::ValueType>::max() - m_subPaths.size()};
1841+
solAssert(objectId.value >= m_subs.size());
18441842
m_subPaths[_subPath] = objectId;
18451843
}
18461844

18471845
return m_subPaths[_subPath];
18481846
}
18491847

1850-
Assembly const* Assembly::subAssemblyById(size_t _subId) const
1848+
Assembly const* Assembly::subAssemblyById(SubAssemblyID const _subId) const
18511849
{
1852-
std::vector<size_t> subIds = decodeSubPath(_subId);
1850+
std::vector<SubAssemblyID> subIDs = decodeSubPath(_subId);
18531851
Assembly const* currentAssembly = this;
1854-
for (size_t currentSubId: subIds)
1852+
for (auto const& subID: subIDs)
18551853
{
1856-
currentAssembly = currentAssembly->m_subs.at(currentSubId).get();
1854+
currentAssembly = currentAssembly->m_subs.at(subID.asIndex()).get();
18571855
assertThrow(currentAssembly, AssemblyException, "");
18581856
}
18591857

libevmasm/Assembly.h

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <libevmasm/AssemblyItem.h>
2424
#include <libevmasm/LinkerObject.h>
2525
#include <libevmasm/Exceptions.h>
26+
#include <libevmasm/SubAssemblyID.h>
2627

2728
#include <liblangutil/DebugInfoSelection.h>
2829
#include <liblangutil/EVMVersion.h>
@@ -47,9 +48,9 @@ using AssemblyPointer = std::shared_ptr<Assembly>;
4748

4849
class Assembly
4950
{
50-
using TagRefs = std::map<size_t, std::pair<size_t, size_t>>;
51+
using TagRefs = std::map<size_t, std::pair<SubAssemblyID, size_t>>;
5152
using DataRefs = std::multimap<util::h256, unsigned>;
52-
using SubAssemblyRefs = std::multimap<size_t, size_t>;
53+
using SubAssemblyRefs = std::multimap<SubAssemblyID, size_t>;
5354
using ProgramSizeRefs = std::vector<unsigned>;
5455
using LinkRef = std::pair<size_t, std::string>;
5556

@@ -81,8 +82,8 @@ class Assembly
8182
AssemblyItem newData(bytes const& _data) { util::h256 h(util::keccak256(util::asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); }
8283
bytes const& data(util::h256 const& _i) const { return m_data.at(_i); }
8384
AssemblyItem newSub(AssemblyPointer const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); }
84-
Assembly const& sub(size_t _sub) const { return *m_subs.at(_sub); }
85-
Assembly& sub(size_t _sub) { return *m_subs.at(_sub); }
85+
Assembly const& sub(SubAssemblyID const _sub) const { return *m_subs.at(_sub.asIndex()); }
86+
Assembly& sub(SubAssemblyID const _sub) { return *m_subs.at(_sub.asIndex()); }
8687
size_t numSubs() const { return m_subs.size(); }
8788
AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); }
8889
AssemblyItem newPushLibraryAddress(std::string const& _identifier);
@@ -142,9 +143,9 @@ class Assembly
142143
/// Adds a subroutine to the code (in the data section) and pushes its size (via a tag)
143144
/// on the stack. @returns the pushsub assembly item.
144145
AssemblyItem appendSubroutine(AssemblyPointer const& _assembly) { auto sub = newSub(_assembly); append(newPushSubSize(size_t(sub.data()))); return sub; }
145-
void pushSubroutineSize(size_t _subRoutine) { append(newPushSubSize(_subRoutine)); }
146+
void pushSubroutineSize(SubAssemblyID _subRoutine) { append(newPushSubSize(_subRoutine.value)); }
146147
/// Pushes the offset of the subroutine.
147-
void pushSubroutineOffset(size_t _subRoutine) { append(AssemblyItem(PushSub, _subRoutine)); }
148+
void pushSubroutineOffset(SubAssemblyID _subRoutine) { append(AssemblyItem(PushSub, _subRoutine.value)); }
148149

149150
/// Appends @a _data literally to the very end of the bytecode.
150151
void appendToAuxiliaryData(bytes const& _data) { m_auxiliaryData += _data; }
@@ -216,8 +217,8 @@ class Assembly
216217
/// Mark this assembly as invalid. Calling ``assemble`` on it will throw.
217218
void markAsInvalid() { m_invalid = true; }
218219

219-
std::vector<size_t> decodeSubPath(size_t _subObjectId) const;
220-
size_t encodeSubPath(std::vector<size_t> const& _subPath);
220+
std::vector<SubAssemblyID> decodeSubPath(SubAssemblyID _subObjectId) const;
221+
SubAssemblyID encodeSubPath(std::vector<SubAssemblyID> const& _subPath);
221222

222223
bool isCreation() const { return m_creation; }
223224

@@ -265,9 +266,9 @@ class Assembly
265266
private:
266267
bool m_invalid = false;
267268

268-
Assembly const* subAssemblyById(size_t _subId) const;
269+
Assembly const* subAssemblyById(SubAssemblyID _subId) const;
269270

270-
void encodeAllPossibleSubPathsInAssemblyTree(std::vector<size_t> _pathFromRoot = {}, std::vector<Assembly*> _assembliesOnPath = {});
271+
void encodeAllPossibleSubPathsInAssemblyTree(std::vector<SubAssemblyID> _pathFromRoot = {}, std::vector<Assembly*> _assembliesOnPath = {});
271272

272273
std::shared_ptr<std::string const> sharedSourceName(std::string const& _name) const;
273274

@@ -315,7 +316,10 @@ class Assembly
315316

316317
/// Map from a vector representing a path to a particular sub assembly to sub assembly id.
317318
/// This map is used only for sub-assemblies which are not direct sub-assemblies (where path is having more than one value).
318-
std::map<std::vector<size_t>, size_t> m_subPaths;
319+
/// Nested/indirect SubAssemblyIDs are assigned negative unsigned 64 bit integers, i.e., the `i`th indirect
320+
/// index corresponds to `std::numeric_limits<std::uint64_t>::max() - i` in contrast to direct indices which occupy
321+
/// the non-negative half of the index space.
322+
std::map<std::vector<SubAssemblyID>, SubAssemblyID> m_subPaths;
319323

320324
/// Contains the tag replacements relevant for super-assemblies.
321325
/// If set, it means the optimizer has run and we will not run it again.

libevmasm/AssemblyItem.cpp

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ std::string toStringInHex(u256 _value)
5050

5151
}
5252

53-
AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const
53+
AssemblyItem AssemblyItem::toSubAssemblyTag(SubAssemblyID _subId) const
5454
{
5555
assertThrow(data() < (u256(1) << 64), util::Exception, "Tag already has subassembly set.");
5656
assertThrow(m_type == PushTag || m_type == Tag, util::Exception, "");
@@ -61,20 +61,21 @@ AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const
6161
return r;
6262
}
6363

64-
std::pair<size_t, size_t> AssemblyItem::splitForeignPushTag() const
64+
std::pair<SubAssemblyID, size_t> AssemblyItem::splitForeignPushTag() const
6565
{
6666
solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump);
6767
u256 combined = u256(data());
68-
size_t subId = static_cast<size_t>((combined >> 64) - 1);
68+
// the combined u256 is 'dirty', so we can't use the conversion constructor of SubAssemblyID here
69+
SubAssemblyID const subID{static_cast<SubAssemblyID::ValueType>((combined >> 64) - 1)};
6970
size_t tag = static_cast<size_t>(combined & 0xffffffffffffffffULL);
70-
return std::make_pair(subId, tag);
71+
return std::make_pair(subID, tag);
7172
}
7273

7374
size_t AssemblyItem::relativeJumpTagID() const
7475
{
7576
solAssert(m_type == RelativeJump || m_type == ConditionalRelativeJump);
7677
auto const [subId, tagId] = splitForeignPushTag();
77-
solAssert(subId == std::numeric_limits<size_t>::max(), "Relative jump to sub");
78+
solAssert(subId.empty(), "Relative jump to sub");
7879
return tagId;
7980
}
8081

@@ -130,13 +131,13 @@ std::pair<std::string, std::string> AssemblyItem::nameAndData(langutil::EVMVersi
130131
util::unreachable();
131132
}
132133

133-
void AssemblyItem::setPushTagSubIdAndTag(size_t _subId, size_t _tag)
134+
void AssemblyItem::setPushTagSubIdAndTag(SubAssemblyID _subId, size_t _tag)
134135
{
135136
solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump);
136-
solAssert(!(m_type == RelativeJump || m_type == ConditionalRelativeJump) || _subId == std::numeric_limits<size_t>::max());
137+
solAssert(!(m_type == RelativeJump || m_type == ConditionalRelativeJump) || _subId.empty());
137138
u256 data = _tag;
138-
if (_subId != std::numeric_limits<size_t>::max())
139-
data |= (u256(_subId) + 1) << 64;
139+
if (!_subId.empty())
140+
data |= (u256(_subId.value) + 1) << 64;
140141
setData(data);
141142
}
142143

@@ -352,13 +353,11 @@ std::string AssemblyItem::toAssemblyText(Assembly const& _assembly) const
352353
break;
353354
case PushTag:
354355
{
355-
size_t sub{0};
356-
size_t tag{0};
357-
std::tie(sub, tag) = splitForeignPushTag();
358-
if (sub == std::numeric_limits<size_t>::max())
356+
auto [sub, tag] = splitForeignPushTag();
357+
if (sub.empty())
359358
text = std::string("tag_") + std::to_string(tag);
360359
else
361-
text = std::string("tag_") + std::to_string(sub) + "_" + std::to_string(tag);
360+
text = std::string("tag_") + std::to_string(sub.value) + "_" + std::to_string(tag);
362361
break;
363362
}
364363
case Tag:
@@ -372,8 +371,8 @@ std::string AssemblyItem::toAssemblyText(Assembly const& _assembly) const
372371
case PushSubSize:
373372
{
374373
std::vector<std::string> subPathComponents;
375-
for (size_t subPathComponentId: _assembly.decodeSubPath(static_cast<size_t>(data())))
376-
subPathComponents.emplace_back("sub_" + std::to_string(subPathComponentId));
374+
for (SubAssemblyID subPathComponentId: _assembly.decodeSubPath(SubAssemblyID{data()}))
375+
subPathComponents.emplace_back("sub_" + std::to_string(subPathComponentId.value));
377376
text =
378377
(type() == PushSub ? "dataOffset"s : "dataSize"s) +
379378
"(" +
@@ -469,11 +468,11 @@ std::ostream& solidity::evmasm::operator<<(std::ostream& _out, AssemblyItem cons
469468
break;
470469
case PushTag:
471470
{
472-
size_t subId = _item.splitForeignPushTag().first;
473-
if (subId == std::numeric_limits<size_t>::max())
471+
SubAssemblyID subId = _item.splitForeignPushTag().first;
472+
if (subId.empty())
474473
_out << " PushTag " << _item.splitForeignPushTag().second;
475474
else
476-
_out << " PushTag " << subId << ":" << _item.splitForeignPushTag().second;
475+
_out << " PushTag " << subId.value << ":" << _item.splitForeignPushTag().second;
477476
break;
478477
}
479478
case Tag:

libevmasm/AssemblyItem.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include <libevmasm/Instruction.h>
2626
#include <libevmasm/Exceptions.h>
27+
#include <libevmasm/SubAssemblyID.h>
2728
#include <liblangutil/DebugData.h>
2829
#include <liblangutil/Exceptions.h>
2930
#include <libsolutil/Common.h>
@@ -171,14 +172,14 @@ class AssemblyItem
171172
AssemblyItem pushTag() const { solAssert(m_type == PushTag || m_type == Tag || m_type == RelativeJump || m_type == ConditionalRelativeJump); return AssemblyItem(PushTag, data()); }
172173
/// Converts the tag to a subassembly tag. This has to be called in order to move a tag across assemblies.
173174
/// @param _subId the identifier of the subassembly the tag is taken from.
174-
AssemblyItem toSubAssemblyTag(size_t _subId) const;
175+
AssemblyItem toSubAssemblyTag(SubAssemblyID _subId) const;
175176
/// @returns splits the data of the push tag into sub assembly id and actual tag id.
176177
/// The sub assembly id of non-foreign push tags is -1.
177-
std::pair<size_t, size_t> splitForeignPushTag() const;
178+
std::pair<SubAssemblyID, size_t> splitForeignPushTag() const;
178179
/// @returns relative jump target tag ID. Asserts that it is not foreign tag.
179180
size_t relativeJumpTagID() const;
180181
/// Sets sub-assembly part and tag for a push tag.
181-
void setPushTagSubIdAndTag(size_t _subId, size_t _tag);
182+
void setPushTagSubIdAndTag(SubAssemblyID _subId, size_t _tag);
182183

183184
AssemblyItemType type() const { return m_type; }
184185
u256 const& data() const { solAssert(m_type != Operation && m_data != nullptr); return *m_data; }

0 commit comments

Comments
 (0)