1515DirEntry = collections .namedtuple ('DirEntry' , ['name' , 'cd' , 'num_entries' , 'files' ])
1616FileEntry = collections .namedtuple ('FileEntry' , ['name' , 'offset' , 'length' , 'comp_length' , 'flags' , 'timedate1' , 'timedate2' ], defaults = (0 , 0 ))
1717
18+ def safe_decode (b ):
19+ for enc in ("cp1252" , "latin-1" , "utf-8" ):
20+ try :
21+ return b .decode (enc )
22+ except UnicodeDecodeError :
23+ continue
24+ return b .decode ("latin-1" , errors = "replace" )
25+
1826def read_struct (f , fmt , constructor ):
1927 if type (fmt ) is str :
2028 fmt = struct .Struct (fmt )
@@ -101,7 +109,7 @@ def dcp_list(options, offset=0):
101109 for fl in dirent .files :
102110 print ("{0:<8}\t @{1:<8}\t {2:<8}\t {3}\t {4}" .format (
103111 fl .length , fl .offset , fl .length if fl .comp_length == 0 else fl .comp_length ,
104- datetime .fromtimestamp (fl .timedate1 | (fl .timedate2 << 64 )).isoformat (), fl .name . decode ( 'utf-8' )))
112+ datetime .fromtimestamp (fl .timedate1 | (fl .timedate2 << 64 )).isoformat (), safe_decode ( fl .name )))
105113
106114def dcp_extract (options , offset = 0 ):
107115 header , dirs = read_headers (options .input , offset )
@@ -114,8 +122,8 @@ def dcp_extract(options, offset=0):
114122 for fl in dirent .files :
115123 print ("{0:<8}\t @{1:<8}\t {2:<8}\t {3}\t {4}" .format (
116124 fl .length , fl .offset , fl .length if fl .comp_length == 0 else fl .comp_length ,
117- datetime .fromtimestamp (fl .timedate1 | (fl .timedate2 << 64 )).isoformat (), fl .name . decode ( 'utf-8' )))
118- output_file = output_int / pathlib .Path (fl .name . decode ( 'utf-8' ).replace ('\\ ' , '/' ))
125+ datetime .fromtimestamp (fl .timedate1 | (fl .timedate2 << 64 )).isoformat (), safe_decode ( fl .name )))
126+ output_file = output_int / pathlib .Path (safe_decode ( fl .name ).replace ('\\ ' , '/' ))
119127 output_file .parent .mkdir (parents = True , exist_ok = True )
120128
121129 with output_file .open ('wb' ) as output_f :
0 commit comments