@@ -188,25 +188,44 @@ def process_hextype(hexaddr):
188188 return hexaddr .replace ("0x" ,"#" )
189189 return hexaddr
190190
191- def build_strings_from_binary_data (binary_data ):
192- strings = []
193- current_string = []
194-
195- for byte in binary_data :
196- if is_alphanumeric (byte ):
197- current_string .append (chr (byte ))
198- elif is_terminator (byte ):
199- if current_string :
200- current_string .append (decode_terminator (byte ))
201- strings .append ('' .join (current_string ))
202- current_string = []
203-
204- # Append the last string if it exists
205- if current_string :
206- strings .append ('' .join (current_string ))
207-
208- # return strings
209- return ('' .join (strings ))
191+ # def build_strings_from_binary_data(binary_data):
192+ # strings = []
193+ # current_string = []
194+ #
195+ # for byte in binary_data:
196+ # if is_alphanumeric(byte):
197+ # current_string.append(chr(byte))
198+ # elif is_terminator(byte):
199+ # if current_string:
200+ # current_string.append(decode_terminator(byte))
201+ # strings.append(''.join(current_string))
202+ # current_string = []
203+ #
204+ # # Append the last string if it exists
205+ # if current_string:
206+ # strings.append(''.join(current_string))
207+ #
208+ # # return strings
209+ # return (''.join(strings))
210+
211+
212+ def build_strings_from_binary_data (binary_data , min_length = 3 ):
213+ """
214+ Searches binary data for ASCII strings of at least min_length and returns a list of found strings.
215+
216+ Args:
217+ binary_data (bytes): The binary data to search.
218+ min_length (int): The minimum length of ASCII string to find. Default is 4.
219+
220+ Returns:
221+ list[str]: List of ASCII strings found in the binary data.
222+ """
223+ # Regular expression to match runs of printable ASCII characters
224+ # print(f"\n{len(binary_data)}")
225+ pattern = rb'[\x20-\x7E]{%d,}' % min_length
226+ matches = re .findall (pattern , binary_data )
227+ # Decode bytes to string, ignoring errors
228+ return [m .decode ('ascii' , errors = 'ignore' ) for m in matches ]
210229
211230def print_progress_bar (iteration , total , prefix = '' , suffix = '' , decimals = 1 , length = 50 , fill = '█' , print_end = "\r " ):
212231 """
@@ -1465,22 +1484,22 @@ def findstring(memstart, memend):
14651484 # print("3")
14661485 # found terminator, output it
14671486 # known_string=f'DEFB {b}{decode_terminator(code[m][0])}'
1468- code_output (orig ,f'x1 DEFB { b } { decode_terminator (code [m ][0 ])} ' ,list_address ,f'{ addcomment } { hexstyle } { orig :x} to { hexstyle } { (orig + len (a )+ 1 ):x} ' )
1487+ code_output (orig ,f'DEFB { b } { decode_terminator (code [m ][0 ])} ' ,list_address ,f'{ addcomment } { hexstyle } { orig :x} to { hexstyle } { (orig + len (a )+ 1 ):x} ' )
14691488 # print(f'Bump 1 {hex(program_counter)}-->{hex(program_counter+len(a)-1)}')
14701489 program_counter += len (a )- 1
14711490 elif identified (m )== "S" and not is_terminator (code [m ][0 ]):
14721491 # print("------>>>> 4")
14731492 # Causing issues with some string endings
14741493 #No terminator, just dump the string
14751494 # print("-->", hex(program_counter),b,a)
1476- code_output (orig ,f'x2 DEFB { a } ' ,list_address ,f'{ addcomment } { hexstyle } { orig :x} to { hexstyle } { orig + len (a )- 2 :x} ' )
1495+ code_output (orig ,f'DEFB { a } ' ,list_address ,f'{ addcomment } { hexstyle } { orig :x} to { hexstyle } { orig + len (a )- 2 :x} ' )
14771496 # print(f'Bump 2 {hex(program_counter)}-->{hex(program_counter+len(a)-2)}')
14781497 program_counter += len (a )- 1
14791498 # program_counter=program_counter+len(b)
14801499 # str_locations[program_counter]
14811500 else :
14821501 # print("5")
1483- code_output (orig ,f'x3 DEFB "{ d } ' ,list_address ,f'{ addcomment } { hexstyle } { orig :x} to { hexstyle } { orig + len (a )- 2 :x} ' )
1502+ code_output (orig ,f'DEFB "{ d } ' ,list_address ,f'{ addcomment } { hexstyle } { orig :x} to { hexstyle } { orig + len (a )- 2 :x} ' )
14841503 # print(f'Bump 3 {hex(program_counter)}-->{hex(program_counter+len(a)-2)}')
14851504 program_counter += len (a )- 2
14861505 # print(hex(program_counter))
@@ -1513,19 +1532,20 @@ def findstring(memstart, memend):
15131532 cnt = program_counter
15141533 result = build_strings_from_binary_data (tmp_array )
15151534 # print("-->",result)
1535+ # print(f"{len(result)}")
15161536 # result=result.replace('"', '",34,"').replace("\\", '", 0x5c, "')
15171537 # print("---->",result,code[src_array_index][1],---code[src_array_index][2],"\n")
15181538 # program_counter=program_counter+len(result)
15191539 str_len = len (result )
1520- result = result .replace ('"' , '",34,"' ).replace ("\\ " , '", 0x5c, "' )
15211540 # print("-->",result,(identified(program_counter) == "S"),is_terminator(code[program_counter][0]))
15221541 # dump_code_array("-- term -->",program_counter,)
15231542 # print("-->",result)
15241543 #--------------------------------
15251544 #FIXME: Something in here is breaking labels after a string, probably one of the increments
15261545 # So its adding code area to the string if the string isn't terminated, but the area is marked as code.
15271546 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1528- if result != "" :
1547+ if str_len > 0 :
1548+ result = result [0 ].replace ('"' , '",34,"' ).replace ("\\ " , '", 0x5c, "' )
15291549 # if 0xf77b < program_counter < 0xf79c:
15301550 # print("----> 1-",hex(program_counter),identified(program_counter))
15311551 # program_counter=program_counter+str_len
@@ -1535,31 +1555,34 @@ def findstring(memstart, memend):
15351555 else :
15361556 addcomment = ""
15371557
1538- code_output (program_counter ,f'x4 DEFB "{ result } { decode_terminator (code [program_counter + str_len ][0 ])} ' ,list_address ,f'{ addcomment } { hexstyle } { program_counter :x} to { hexstyle } { (program_counter + str_len + 1 ):x} ' )
1558+ code_output (program_counter ,f'DEFB "{ result } { decode_terminator (code [program_counter + str_len ][0 ])} ' ,list_address ,f'{ addcomment } { hexstyle } { program_counter :x} to { hexstyle } { (program_counter + str_len ):x} ' )
15391559 # Bump for terminator
15401560 # print(f'Bump 4 {hex(program_counter)}-->{hex(program_counter+str_len)}')
1541- program_counter += str_len + 1
1561+ program_counter += len ( result ) + 1
15421562 else :
15431563 #Probably never called, but better safe etc etc
1544- code_output (program_counter ,f'x5 DEFB "{ result } "' ,list_address )
1564+ code_output (program_counter ,f'DEFB "{ result } "' ,list_address )
15451565 elif (identified (program_counter ) == "S" ) and (code [program_counter ][0 ]> 0x80 ) and not is_terminator (code [program_counter ][0 ]):
15461566 # if 0xf77b < program_counter < 0xf79c:
15471567 # print("----> 2 -",hex(program_counter),identified(program_counter))
15481568 #Issue #30: This is part of the issue, but not sure why yet.
1549- result = result + decode_terminator (code [program_counter ][0 ]).replace ('",' ,"" )
1550- code_output (program_counter - str_len ,f'x6 DEFB { result } ' ,list_address )
1569+ # result=result+decode_terminator(code[program_counter][0]).replace('",',"")
1570+
1571+ result = hex (code [program_counter ][0 ])
1572+
1573+ code_output (program_counter - str_len ,f'DEFB { result } ' ,list_address )
15511574 # print(f'Bump 5 {hex(program_counter)}-->{hex(program_counter+1)}')
15521575 program_counter += 1 #str_len
15531576 else :
15541577 # print("----> 3 -",hex(program_counter),identified(program_counter))
1555- code_output (program_counter - str_len ,f'x7 DEFB { hexstyle } { (code [program_counter ][0 ]):x} ' ,list_address )
1578+ code_output (program_counter - str_len ,f'DEFB { hexstyle } { (code [program_counter ][0 ]):x} ' ,list_address )
15561579 # print(f'Bump 6 {hex(program_counter)}-->{hex(program_counter+1)}')
15571580 program_counter += 1
15581581 # elif identified(program_counter) == "D" and (program_counter in str_locations) and not stay_in_code:
15591582 elif identified (program_counter ) == "D" and (program_counter in str_locations ):
15601583 #Its a string!
15611584 code_output (
1562- program_counter , "x8 DEFB " + str_locations [program_counter ], list_address
1585+ program_counter , "DEFB " + str_locations [program_counter ], list_address
15631586 )
15641587 # print(f'Bump 7 {hex(program_counter)}-->{hex(program_counter+str_sizes[program_counter])}')
15651588 program_counter += str_sizes [program_counter ]
@@ -1581,7 +1604,7 @@ def findstring(memstart, memend):
15811604 #BUG: Causes defb 01 01 on -c 0
15821605 if commentlevel == 0 :
15831606 out_tmp = "; " + out_tmp
1584- code_output (program_counter , f"x9 DEFB { hexstyle } { tmp :x} " , list_address , f'{ out_tmp } ' )
1607+ code_output (program_counter , f"DEFB { hexstyle } { tmp :x} " , list_address , f'{ out_tmp } ' )
15851608 # debug("PC Bump")
15861609 program_counter += 1 #FIXME - tripping PC too much?
15871610 elif identified (program_counter ) == "Dw" :
0 commit comments