Skip to content

Commit d1e1aae

Browse files
authored
Merge pull request #31 from cormacj/bugfixing
Bugfixing
2 parents 2034902 + 4d9c539 commit d1e1aae

File tree

2 files changed

+60
-32
lines changed

2 files changed

+60
-32
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
2025-03-05: First official release (0.75)
33
2025-03-21: Bump to v0.80 - Add support for external labels, such as BIOS calls. Improved code detection methods.
44
2025-06-13: Bump to v0.85 - Add the ability to define user-defined labels to the labels file. Increased error handling, more informational messages.
5+
2025-06-18: Bump to v0.87 - Corrected issues with string detection where strings weren't being properly decoded.

z80-disassembler.py

Lines changed: 59 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class Pointer(NamedTuple):
9090
str_sizes = {}
9191
style = "asm"
9292
hexstyle = "0x"
93-
myversion = "0.85"
93+
myversion = "0.87"
9494

9595

9696
#--- Debugging functions ---
@@ -188,25 +188,44 @@ def process_hextype(hexaddr):
188188
return hexaddr.replace("0x","#")
189189
return hexaddr
190190

191-
def build_strings_from_binary_data(binary_data):
192-
strings = []
193-
current_string = []
191+
# def build_strings_from_binary_data(binary_data):
192+
# strings = []
193+
# current_string = []
194+
#
195+
# for byte in binary_data:
196+
# if is_alphanumeric(byte):
197+
# current_string.append(chr(byte))
198+
# elif is_terminator(byte):
199+
# if current_string:
200+
# current_string.append(decode_terminator(byte))
201+
# strings.append(''.join(current_string))
202+
# current_string = []
203+
#
204+
# # Append the last string if it exists
205+
# if current_string:
206+
# strings.append(''.join(current_string))
207+
#
208+
# # return strings
209+
# return (''.join(strings))
194210

195-
for byte in binary_data:
196-
if is_alphanumeric(byte):
197-
current_string.append(chr(byte))
198-
elif is_terminator(byte):
199-
if current_string:
200-
current_string.append(decode_terminator(byte))
201-
strings.append(''.join(current_string))
202-
current_string = []
203211

204-
# Append the last string if it exists
205-
if current_string:
206-
strings.append(''.join(current_string))
212+
def build_strings_from_binary_data(binary_data, min_length=3):
213+
"""
214+
Searches binary data for ASCII strings of at least min_length and returns a list of found strings.
207215
208-
# return strings
209-
return (''.join(strings))
216+
Args:
217+
binary_data (bytes): The binary data to search.
218+
min_length (int): The minimum length of ASCII string to find. Default is 4.
219+
220+
Returns:
221+
list[str]: List of ASCII strings found in the binary data.
222+
"""
223+
# Regular expression to match runs of printable ASCII characters
224+
# print(f"\n{len(binary_data)}")
225+
pattern = rb'[\x20-\x7E]{%d,}' % min_length
226+
matches = re.findall(pattern, binary_data)
227+
# Decode bytes to string, ignoring errors
228+
return [m.decode('ascii', errors='ignore') for m in matches]
210229

211230
def print_progress_bar(iteration, total, prefix='', suffix='', decimals=1, length=50, fill='█', print_end="\r"):
212231
"""
@@ -812,6 +831,7 @@ def lookup_label(addr, prettyprint=""):
812831
Returns:
813832
Formatted String
814833
"""
834+
global extern_labels
815835
if not is_in_code(addr):
816836
debug("-->Not in code")
817837
if addr in extern_labels:
@@ -1198,6 +1218,13 @@ def findstring(memstart, memend):
11981218
code_snapshot = bytearray(8)
11991219
loc = 0
12001220

1221+
if args.labelsfile:
1222+
print(f"Loading labels file: {args.labelsfile}... ",end="")
1223+
load_labels(args.labelsfile)
1224+
print("Done!",end="")
1225+
if args.quiet:
1226+
print("\n")
1227+
12011228
# dump_code_array()
12021229
if args.templatefile is not None:
12031230
print(f"Loading template file: {args.templatefile}...",end="")
@@ -1332,27 +1359,22 @@ def findstring(memstart, memend):
13321359
# Print the used external EQUs (with nice formatting)
13331360
# First find the longest label
13341361

1335-
if args.labelsfile:
1336-
print(f"Loading labels file: {args.labelsfile}... ",end="")
1337-
load_labels(args.labelsfile)
1338-
print("Done!",end="")
1339-
if args.quiet:
1340-
print("\n")
1341-
13421362
maxlen=0
13431363
for loop in extern_labels:
13441364
debug(f'{extern_labels[loop][0]} called {extern_labels[loop][1]} times')
13451365
if extern_labels[loop][1]>0:
13461366
if len(extern_labels[loop][0])>maxlen:
13471367
maxlen=len(extern_labels[loop][0])
1348-
# print(f'{extern_labels[loop][0]} equ {hex(loop)}')
13491368

13501369
do_write("; Define labels for external calls")
1351-
# Now print the labels.
1370+
1371+
# Now print the labels, but only those that were called.
13521372
for loop in extern_labels:
1373+
# print(f'{extern_labels[loop][0]} called {extern_labels[loop][1]} times')
13531374
if extern_labels[loop][1]>0:
13541375
do_write(f'{extern_labels[loop][0].ljust(maxlen)} equ {hex(loop)}')
13551376
do_write("\n\n")
1377+
13561378
# Print the org statement
13571379
program_counter=min(code)
13581380
if args.style == "asm":
@@ -1472,7 +1494,7 @@ def findstring(memstart, memend):
14721494
# print("-->", hex(program_counter),b,a)
14731495
code_output(orig,f'DEFB {a}',list_address,f'{addcomment}{hexstyle}{orig:x} to {hexstyle}{orig+len(a)-2:x}')
14741496
# print(f'Bump 2 {hex(program_counter)}-->{hex(program_counter+len(a)-2)}')
1475-
program_counter += 1 #len(a)-1
1497+
program_counter += len(a)-1
14761498
# program_counter=program_counter+len(b)
14771499
# str_locations[program_counter]
14781500
else:
@@ -1510,19 +1532,20 @@ def findstring(memstart, memend):
15101532
cnt=program_counter
15111533
result=build_strings_from_binary_data(tmp_array)
15121534
# print("-->",result)
1535+
# print(f"{len(result)}")
15131536
# result=result.replace('"', '",34,"').replace("\\", '", 0x5c, "')
15141537
# print("---->",result,code[src_array_index][1],---code[src_array_index][2],"\n")
15151538
# program_counter=program_counter+len(result)
15161539
str_len=len(result)
1517-
result=result.replace('"', '",34,"').replace("\\", '", 0x5c, "')
15181540
# print("-->",result,(identified(program_counter) == "S"),is_terminator(code[program_counter][0]))
15191541
# dump_code_array("-- term -->",program_counter,)
15201542
# print("-->",result)
15211543
#--------------------------------
15221544
#FIXME: Something in here is breaking labels after a string, probably one of the increments
15231545
# So its adding code area to the string if the string isn't terminated, but the area is marked as code.
15241546
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1525-
if result!="":
1547+
if str_len>0:
1548+
result=result[0].replace('"', '",34,"').replace("\\", '", 0x5c, "')
15261549
# if 0xf77b < program_counter < 0xf79c:
15271550
# print("----> 1-",hex(program_counter),identified(program_counter))
15281551
# program_counter=program_counter+str_len
@@ -1532,17 +1555,21 @@ def findstring(memstart, memend):
15321555
else:
15331556
addcomment=""
15341557

1535-
code_output(program_counter,f'DEFB "{result}{decode_terminator(code[program_counter+str_len][0])}',list_address,f'{addcomment}{hexstyle}{program_counter:x} to {hexstyle}{(program_counter+str_len+1):x}')
1558+
code_output(program_counter,f'DEFB "{result}{decode_terminator(code[program_counter+str_len][0])}',list_address,f'{addcomment}{hexstyle}{program_counter:x} to {hexstyle}{(program_counter+str_len):x}')
15361559
# Bump for terminator
15371560
# print(f'Bump 4 {hex(program_counter)}-->{hex(program_counter+str_len)}')
1538-
program_counter +=str_len+1
1561+
program_counter +=len(result)+1
15391562
else:
15401563
#Probably never called, but better safe etc etc
15411564
code_output(program_counter,f'DEFB "{result}"',list_address)
15421565
elif (identified(program_counter) == "S") and (code[program_counter][0]>0x80) and not is_terminator(code[program_counter][0]):
15431566
# if 0xf77b < program_counter < 0xf79c:
15441567
# print("----> 2 -",hex(program_counter),identified(program_counter))
1545-
result=result+decode_terminator(code[program_counter][0]).replace('",',"")
1568+
#Issue #30: This is part of the issue, but not sure why yet.
1569+
# result=result+decode_terminator(code[program_counter][0]).replace('",',"")
1570+
1571+
result=hex(code[program_counter][0])
1572+
15461573
code_output(program_counter-str_len,f'DEFB {result}',list_address)
15471574
# print(f'Bump 5 {hex(program_counter)}-->{hex(program_counter+1)}')
15481575
program_counter +=1 #str_len

0 commit comments

Comments
 (0)