Skip to content

libevmasm: refactor asm-json export & add support for source list. #12799

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ Language Features:

Compiler Features:
* Peephole Optimizer: Remove operations without side effects before simple terminations.
* Assembly-Json: Export: Include source list in `sourceList` field.


Bugfixes:
* Assembly-Json: Fix assembly json export to store jump types of operations in `jumpType` field instead of `value`.



Expand Down
150 changes: 43 additions & 107 deletions libevmasm/Assembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,141 +222,77 @@ string Assembly::assemblyString(
return tmp.str();
}

Json::Value Assembly::createJsonValue(string _name, int _source, int _begin, int _end, string _value, string _jumpType)
{
Json::Value value{Json::objectValue};
value["name"] = _name;
value["source"] = _source;
value["begin"] = _begin;
value["end"] = _end;
if (!_value.empty())
value["value"] = _value;
if (!_jumpType.empty())
value["jumpType"] = _jumpType;
return value;
}

string Assembly::toStringInHex(u256 _value)
{
std::stringstream hexStr;
hexStr << std::uppercase << hex << _value;
return hexStr.str();
}

Json::Value Assembly::assemblyJSON(map<string, unsigned> const& _sourceIndices) const
Json::Value Assembly::assemblyJSON(map<string, unsigned> const& _sourceIndices, bool _includeSourceList) const
{
Json::Value root;
root[".code"] = Json::arrayValue;

Json::Value& collection = root[".code"];
for (AssemblyItem const& i: m_items)
Json::Value& code = root[".code"];
for (AssemblyItem const& item: m_items)
{
int sourceIndex = -1;
if (i.location().sourceName)
if (item.location().sourceName)
{
auto iter = _sourceIndices.find(*i.location().sourceName);
auto iter = _sourceIndices.find(*item.location().sourceName);
if (iter != _sourceIndices.end())
sourceIndex = static_cast<int>(iter->second);
}

switch (i.type())
auto [name, data] = item.nameAndData();
Json::Value jsonItem;
jsonItem["name"] = name;
jsonItem["begin"] = item.location().start;
jsonItem["end"] = item.location().end;
if (item.m_modifierDepth != 0)
jsonItem["modifierDepth"] = static_cast<int>(item.m_modifierDepth);
std::string jumpType = item.getJumpTypeAsString();
if (!jumpType.empty())
jsonItem["jumpType"] = jumpType;
if (name == "PUSHLIB")
data = m_libraries.at(h256(data));
else if (name == "PUSHIMMUTABLE" || name == "ASSIGNIMMUTABLE")
data = m_immutables.at(h256(data));
if (!data.empty())
jsonItem["value"] = data;
jsonItem["source"] = sourceIndex;
code.append(move(jsonItem));

if (item.type() == AssemblyItemType::Tag)
{
case Operation:
collection.append(
createJsonValue(
instructionInfo(i.instruction()).name,
sourceIndex,
i.location().start,
i.location().end,
i.getJumpTypeAsString())
);
break;
case Push:
collection.append(
createJsonValue("PUSH", sourceIndex, i.location().start, i.location().end, toStringInHex(i.data()), i.getJumpTypeAsString()));
break;
case PushTag:
if (i.data() == 0)
collection.append(
createJsonValue("PUSH [ErrorTag]", sourceIndex, i.location().start, i.location().end, ""));
else
collection.append(
createJsonValue("PUSH [tag]", sourceIndex, i.location().start, i.location().end, toString(i.data())));
break;
case PushSub:
collection.append(
createJsonValue("PUSH [$]", sourceIndex, i.location().start, i.location().end, toString(h256(i.data()))));
break;
case PushSubSize:
collection.append(
createJsonValue("PUSH #[$]", sourceIndex, i.location().start, i.location().end, toString(h256(i.data()))));
break;
case PushProgramSize:
collection.append(
createJsonValue("PUSHSIZE", sourceIndex, i.location().start, i.location().end));
break;
case PushLibraryAddress:
collection.append(
createJsonValue("PUSHLIB", sourceIndex, i.location().start, i.location().end, m_libraries.at(h256(i.data())))
);
break;
case PushDeployTimeAddress:
collection.append(
createJsonValue("PUSHDEPLOYADDRESS", sourceIndex, i.location().start, i.location().end)
);
break;
case PushImmutable:
collection.append(createJsonValue(
"PUSHIMMUTABLE",
sourceIndex,
i.location().start,
i.location().end,
m_immutables.at(h256(i.data()))
));
break;
case AssignImmutable:
collection.append(createJsonValue(
"ASSIGNIMMUTABLE",
sourceIndex,
i.location().start,
i.location().end,
m_immutables.at(h256(i.data()))
));
break;
case Tag:
collection.append(
createJsonValue("tag", sourceIndex, i.location().start, i.location().end, toString(i.data())));
collection.append(
createJsonValue("JUMPDEST", sourceIndex, i.location().start, i.location().end));
break;
case PushData:
collection.append(createJsonValue("PUSH data", sourceIndex, i.location().start, i.location().end, toStringInHex(i.data())));
break;
case VerbatimBytecode:
collection.append(createJsonValue("VERBATIM", sourceIndex, i.location().start, i.location().end, util::toHex(i.verbatimData())));
break;
default:
assertThrow(false, InvalidOpcode, "");
Json::Value jumpdest;
jumpdest["name"] = "JUMPDEST";
jumpdest["begin"] = item.location().start;
jumpdest["end"] = item.location().end;
jumpdest["source"] = sourceIndex;
if (item.m_modifierDepth != 0)
jumpdest["modifierDepth"] = static_cast<int>(item.m_modifierDepth);
code.append(move(jumpdest));
}
}
if (_includeSourceList)
{
root["sourceList"] = Json::arrayValue;
Json::Value& jsonSourceList = root["sourceList"];
for (auto const& [name, index]: _sourceIndices)
jsonSourceList[index] = name;
}

if (!m_data.empty() || !m_subs.empty())
{
root[".data"] = Json::objectValue;
Json::Value& data = root[".data"];
for (auto const& i: m_data)
if (u256(i.first) >= m_subs.size())
data[toStringInHex((u256)i.first)] = util::toHex(i.second);
data[util::toHex(toBigEndian((u256)i.first), util::HexPrefix::DontAdd, util::HexCase::Upper)] = util::toHex(i.second);

for (size_t i = 0; i < m_subs.size(); ++i)
{
std::stringstream hexStr;
hexStr << hex << i;
data[hexStr.str()] = m_subs[i]->assemblyJSON(_sourceIndices);
data[hexStr.str()] = m_subs[i]->assemblyJSON(_sourceIndices, /*_includeSourceList = */false);
}
}

if (m_auxiliaryData.size() > 0)
if (!m_auxiliaryData.empty())
root[".auxdata"] = util::toHex(m_auxiliaryData);

return root;
Expand Down
15 changes: 4 additions & 11 deletions libevmasm/Assembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <sstream>
#include <memory>
#include <map>
#include <utility>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. Never knew that C++ had such a catch-all header for basic stuff. I wonder if we should be using it more often instead of including things individually.


namespace solidity::evmasm
{
Expand Down Expand Up @@ -147,7 +148,8 @@ class Assembly

/// Create a JSON representation of the assembly.
Json::Value assemblyJSON(
std::map<std::string, unsigned> const& _sourceIndices = std::map<std::string, unsigned>()
std::map<std::string, unsigned> const& _sourceIndices = std::map<std::string, unsigned>(),
bool _includeSourceList = true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this default to false to retain the old behaviour? Do we need a default value?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now I set includeSourceList to false, only if a sub-assembly is exported. In CompilerStack::assemblyJSON(..) we want to have the source list included. If called with default value, the top-level assembly json will always include the source list, but it's sub-assemblies not.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, this should also be mentioned in the changelog.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats true!

) const;

/// Mark this assembly as invalid. Calling ``assemble`` on it will throw.
Expand All @@ -167,16 +169,6 @@ class Assembly
unsigned codeSize(unsigned subTagSize) const;

private:
static Json::Value createJsonValue(
std::string _name,
int _source,
int _begin,
int _end,
std::string _value = std::string(),
std::string _jumpType = std::string()
);
static std::string toStringInHex(u256 _value);

bool m_invalid = false;

Assembly const* subAssemblyById(size_t _subId) const;
Expand Down Expand Up @@ -222,6 +214,7 @@ class Assembly
std::string m_name;

langutil::SourceLocation m_currentSourceLocation;

public:
size_t m_currentModifierDepth = 0;
};
Expand Down
51 changes: 51 additions & 0 deletions libevmasm/AssemblyItem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <libevmasm/Assembly.h>

#include <libsolutil/CommonData.h>
#include <libsolutil/CommonIO.h>
#include <libsolutil/Numeric.h>
#include <libsolutil/StringUtils.h>
#include <libsolutil/FixedHash.h>
Expand All @@ -36,6 +37,18 @@ using namespace solidity::langutil;

static_assert(sizeof(size_t) <= 8, "size_t must be at most 64-bits wide");

namespace
{

string toStringInHex(u256 _value)
{
std::stringstream hexStr;
hexStr << std::uppercase << hex << _value;
return hexStr.str();
}

}

AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const
{
assertThrow(data() < (u256(1) << 64), util::Exception, "Tag already has subassembly set.");
Expand All @@ -56,6 +69,44 @@ pair<size_t, size_t> AssemblyItem::splitForeignPushTag() const
return make_pair(subId, tag);
}

pair<string, string> AssemblyItem::nameAndData() const
{
switch (type())
{
case Operation:
return {instructionInfo(instruction()).name, m_data != nullptr ? toStringInHex(*m_data) : ""};
case Push:
return {"PUSH", toStringInHex(data())};
case PushTag:
if (data() == 0)
return {"PUSH [ErrorTag]", ""};
else
return {"PUSH [tag]", util::toString(data())};
case PushSub:
return {"PUSH [$]", toString(util::h256(data()))};
case PushSubSize:
return {"PUSH #[$]", toString(util::h256(data()))};
case PushProgramSize:
return {"PUSHSIZE", ""};
case PushLibraryAddress:
return {"PUSHLIB", toString(util::h256(data()))};
case PushDeployTimeAddress:
return {"PUSHDEPLOYADDRESS", ""};
case PushImmutable:
return {"PUSHIMMUTABLE", toString(util::h256(data()))};
case AssignImmutable:
return {"ASSIGNIMMUTABLE", toString(util::h256(data()))};
case Tag:
return {"tag", util::toString(data())};
case PushData:
return {"PUSH data", toStringInHex(data())};
case VerbatimBytecode:
return {"VERBATIM", util::toHex(verbatimData())};
default:
assertThrow(false, InvalidOpcode, "");
}
}

void AssemblyItem::setPushTagSubIdAndTag(size_t _subId, size_t _tag)
{
assertThrow(m_type == PushTag || m_type == Tag, util::Exception, "");
Expand Down
7 changes: 7 additions & 0 deletions libevmasm/AssemblyItem.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ class AssemblyItem
u256 const& data() const { assertThrow(m_type != Operation, util::Exception, ""); return *m_data; }
void setData(u256 const& _data) { assertThrow(m_type != Operation, util::Exception, ""); m_data = std::make_shared<u256>(_data); }

/// This function is used in `Assembly::assemblyJSON`.
/// It returns the name & data of the current assembly item.
/// @returns a pair, where the first element is the json-assembly
/// item name, where second element is the string representation
/// of it's data.
std::pair<std::string, std::string> nameAndData() const;

bytes const& verbatimData() const { assertThrow(m_type == VerbatimBytecode, util::Exception, ""); return std::get<2>(*m_verbatimBytecode); }

/// @returns the instruction of this item (only valid if type() == Operation)
Expand Down
Loading