1313import mmap
1414import numpy
1515import os
16- import sys
1716import typing
1817import zlib
1918from struct import pack , unpack
@@ -51,7 +50,8 @@ class FileArchive:
5150 start_recovery_tag = 0xAA55AA55
5251 end_recovery_tag = 0x55AA55AA
5352
54- def __init__ (self , filename , must_exists = False ):
53+ def __init__ (self , filename , must_exists = False , encoding = "ascii" ):
54+ self .encoding = encoding
5555
5656 self .ft = {} # type: typing.Dict[str,FileInfo]
5757 if os .path .exists (filename ):
@@ -182,12 +182,12 @@ def read_v(self, typ, size):
182182 return res
183183
184184 # write routines
185- def write_str (self , s ):
185+ def write_str (self , s , enc = "ascii" ):
186186 """
187187 :param str s:
188188 :rtype: int
189189 """
190- return self .f .write (pack ("%ds" % len (s ) , s .encode ("ascii" )))
190+ return self .f .write (pack ("%ds" % len (s . encode ( enc )) , s .encode (enc )))
191191
192192 def write_char (self , i ):
193193 """
@@ -256,7 +256,7 @@ def readFileInfoTable(self):
256256 return
257257 for i in range (count ):
258258 str_len = self .read_u32 ()
259- name = self .read_str (str_len )
259+ name = self .read_str (str_len , self . encoding )
260260 pos = self .read_u64 ()
261261 size = self .read_u32 ()
262262 comp = self .read_u32 ()
@@ -271,8 +271,8 @@ def writeFileInfoTable(self):
271271 self .write_u32 (len (self .ft ))
272272
273273 for fi in self .ft .values ():
274- self .write_u32 (len (fi .name ))
275- self .write_str (fi .name )
274+ self .write_u32 (len (fi .name . encode ( self . encoding ) ))
275+ self .write_str (fi .name , self . encoding )
276276 self .write_u64 (fi .pos )
277277 self .write_u32 (fi .size )
278278 self .write_u32 (fi .compressed )
@@ -293,7 +293,7 @@ def scanArchive(self):
293293 continue
294294
295295 fn_len = self .read_u32 ()
296- name = self .read_str (fn_len )
296+ name = self .read_str (fn_len , self . encoding )
297297 pos = self .f .tell ()
298298 size = self .read_u32 ()
299299 comp = self .read_u32 ()
@@ -322,7 +322,7 @@ def _raw_read(self, size, typ):
322322 """
323323
324324 if typ == "str" :
325- return self .read_str (size )
325+ return self .read_str (size , self . encoding )
326326
327327 elif typ == "feat" :
328328 type_len = self .read_U32 ()
@@ -496,8 +496,8 @@ def addFeatureCache(self, filename, features, times):
496496 :param times:
497497 """
498498 self .write_U32 (self .start_recovery_tag )
499- self .write_u32 (len (filename ))
500- self .write_str (filename )
499+ self .write_u32 (len (filename . encode ( self . encoding ) ))
500+ self .write_str (filename , self . encoding )
501501 pos = self .f .tell ()
502502 if len (features ) > 0 :
503503 dim = len (features [0 ])
@@ -542,8 +542,8 @@ def addAttributes(self, filename, dim, duration):
542542 ) % (dim , duration )
543543 self .write_U32 (self .start_recovery_tag )
544544 filename = "%s.attribs" % filename
545- self .write_u32 (len (filename ))
546- self .write_str (filename )
545+ self .write_u32 (len (filename . encode ( self . encoding ) ))
546+ self .write_str (filename , self . encoding )
547547 pos = self .f .tell ()
548548 size = len (data )
549549 self .write_u32 (size )
@@ -559,17 +559,18 @@ class FileArchiveBundle:
559559 File archive bundle.
560560 """
561561
562- def __init__ (self , filename ):
562+ def __init__ (self , filename , encoding = "ascii" ):
563563 """
564564 :param str filename: .bundle file
565+ :param str encoding: encoding used in the files
565566 """
566567 # filename -> FileArchive
567568 self .archives = {} # type: typing.Dict[str,FileArchive]
568569 # archive content file -> FileArchive
569570 self .files = {} # type: typing.Dict[str,FileArchive]
570571 self ._short_seg_names = {}
571572 for line in open (filename ).read ().splitlines ():
572- self .archives [line ] = a = FileArchive (line , must_exists = True )
573+ self .archives [line ] = a = FileArchive (line , must_exists = True , encoding = encoding )
573574 for f in a .ft .keys ():
574575 self .files [f ] = a
575576 # noinspection PyProtectedMember
@@ -616,17 +617,18 @@ def setAllophones(self, filename):
616617 a .setAllophones (filename )
617618
618619
619- def open_file_archive (archive_filename , must_exists = True ):
620+ def open_file_archive (archive_filename , must_exists = True , encoding = "ascii" ):
620621 """
621622 :param str archive_filename:
622623 :param bool must_exists:
624+ :param str encoding:
623625 :rtype: FileArchiveBundle|FileArchive
624626 """
625627 if archive_filename .endswith (".bundle" ):
626628 assert must_exists
627- return FileArchiveBundle (archive_filename )
629+ return FileArchiveBundle (archive_filename , encoding = encoding )
628630 else :
629- return FileArchive (archive_filename , must_exists = must_exists )
631+ return FileArchive (archive_filename , must_exists = must_exists , encoding = encoding )
630632
631633
632634def is_rasr_cache_file (filename ):
0 commit comments