Skip to content
This repository was archived by the owner on May 23, 2023. It is now read-only.

Commit ee777d9

Browse files
committed
Better parsing for contract name.
1. The solc_wrapper interface returns the latest contract bytecode by default (since a single solidity file can have multiple contracts), to do so we need to parse the source file and get the contract/library names in the order of appearence. To correctly handle this scenario we need to ignore the contract/library keywords that appear in comments and also to consider the possibility of bad formatting. 2. The contract code can make use of libraries, since the address is the returned bytecode might have unresolved symbols, to help the task of managing symbols a few functions were added, also we try to decode the bytecode from hexadecimal by default, but if there is an unresolved symbol it will fail and we just fall back to the original hexadecimal representation (this might be a usage trap but required for backwards compability).
1 parent e4994a7 commit ee777d9

File tree

1 file changed

+104
-6
lines changed

1 file changed

+104
-6
lines changed

ethereum/_solidity.py

Lines changed: 104 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,12 @@ def solc_parse_output(compiler_output):
6666
if 'bin' in result.values()[0]:
6767
for value in result.values():
6868
value['bin_hex'] = value['bin']
69-
value['bin'] = value['bin_hex'].decode('hex')
69+
70+
# decoding can fail if the compiled contract has unresolved symbols
71+
try:
72+
value['bin'] = value['bin_hex'].decode('hex')
73+
except TypeError:
74+
pass
7075

7176
for json_data in ('abi', 'devdoc', 'userdoc'):
7277
# the values in the output can be configured through the
@@ -90,10 +95,104 @@ def compiler_version():
9095
return match.group(1)
9196

9297

93-
def solidity_names(code):
98+
def solidity_names(code): # pylint: disable=too-many-branches
9499
""" Return the library and contract names in order of appearence. """
95-
# the special sequence \s is equivalent to the set [ \t\n\r\f\v]
96-
return re.findall(r'(contract|library)\s+([a-zA-Z][a-zA-Z0-9]*)', code, re.MULTILINE)
100+
names = []
101+
in_string = None
102+
backslash = False
103+
comment = None
104+
105+
# "parse" the code by hand to handle the corner cases:
106+
# - the contract or library can be inside a comment or string
107+
# - multiline comments
108+
# - the contract and library keywords could not be at the start of the line
109+
for pos, char in enumerate(code):
110+
if in_string:
111+
if not backslash and in_string == char:
112+
in_string = None
113+
backslash = False
114+
115+
if char == '\\': # pylint: disable=simplifiable-if-statement
116+
backslash = True
117+
else:
118+
backslash = False
119+
120+
elif comment == '//':
121+
if char in ('\n', '\r'):
122+
comment = None
123+
124+
elif comment == '/*':
125+
if char == '*' and code[pos + 1] == '/':
126+
comment = None
127+
128+
else:
129+
if char == '"' or char == "'":
130+
in_string = char
131+
132+
if char == '/':
133+
if code[pos + 1] == '/':
134+
comment = '//'
135+
if code[pos + 1] == '*':
136+
comment = '/*'
137+
138+
if char == 'c' and code[pos: pos + 8] == 'contract':
139+
result = re.match('^contract[^_$a-zA-Z]+([_$a-zA-Z][_$a-zA-Z0-9]*)', code[pos:])
140+
141+
if result:
142+
names.append(('contract', result.groups()[0]))
143+
144+
if char == 'l' and code[pos: pos + 7] == 'library':
145+
result = re.match('^library[^_$a-zA-Z]+([_$a-zA-Z][_$a-zA-Z0-9]*)', code[pos:])
146+
147+
if result:
148+
names.append(('library', result.groups()[0]))
149+
150+
return names
151+
152+
153+
def solidity_library_symbol(library_name):
154+
""" Return the symbol used in the bytecode to represent the `library_name`. """
155+
# the symbol is always 40 characters in length with the minimum of two
156+
# leading and trailing underscores
157+
length = min(len(library_name), 36)
158+
symbol = bytearray('_' * 40)
159+
symbol[2:length] = library_name[:length]
160+
return str(symbol)
161+
162+
163+
def solidity_resolve_address(hex_code, library_name, library_address):
164+
""" Change the bytecode to use the given library address.
165+
166+
Args:
167+
hex_code (bin): The bytecode encoded in hexadecimal.
168+
library_name (str): The library that will be resolved.
169+
library_address (str): The address of the library.
170+
171+
Returns:
172+
bin: The bytecode encoded in hexadecimal with the library references
173+
resolved.
174+
"""
175+
symbol = solidity_library_symbol(library_name)
176+
return hex_code.replace(symbol, library_address)
177+
178+
179+
def solidity_unresolved_symbols(hex_code):
180+
""" Return the unresolved symbols contained in the `hex_code`. """
181+
iterator = iter(hex_code)
182+
symbol_names = []
183+
184+
for char in iterator:
185+
if char == '_':
186+
symbol = char
187+
count = 1
188+
189+
while count < 40:
190+
symbol += next(iterator)
191+
count += 1
192+
193+
symbol_names.append(symbol)
194+
195+
return set(symbol_names)
97196

98197

99198
def compile_file(filepath, libraries=None, combined='bin,abi', optimize=True):
@@ -138,8 +237,7 @@ def compile_last_contract(filepath, libraries=None, combined='bin,abi', optimize
138237

139238
all_contract_names = [
140239
name
141-
for kind, name in all_names
142-
# if kind == 'contract'
240+
for _, name in all_names
143241
]
144242

145243
last_contract = all_contract_names[-1]

0 commit comments

Comments
 (0)