@@ -23,7 +23,7 @@ def __init__(self, opt=None):
23
23
self .opt = aux .get_opt_as_proto (opt or {}, IoUtilsConfigProto )
24
24
self .logger = aux .get_logger ("ioutils" , level = self .opt .py_log_level )
25
25
26
- tmp = tempfile .NamedTemporaryFile (mode = 'w' )
26
+ tmp = tempfile .NamedTemporaryFile (mode = 'w' , delete = False )
27
27
opt_content = json .dumps (aux .proto_to_dict (self .opt ), indent = 2 )
28
28
tmp .write (opt_content )
29
29
tmp .close ()
@@ -49,7 +49,8 @@ def load_stream_vocab(self, filepath, min_count, keys_path):
49
49
self .obj .get_word_vocab (min_count , keys_path )
50
50
51
51
def convert_stream_to_h5 (self , filepath , min_count , out_dir ,
52
- chunk_indices = 10000 ):
52
+ chunk_indices = 10000 , seed = 777 ):
53
+ np .random .seed (seed )
53
54
os .makedirs (out_dir , exist_ok = True )
54
55
keys_path = pjoin (out_dir , "keys.txt" )
55
56
token_path = pjoin (out_dir , "token.h5" )
@@ -86,7 +87,7 @@ def convert_stream_to_h5(self, filepath, min_count, out_dir,
86
87
cols [offset :offset + data_size ] = _cols
87
88
vali .resize ((offset + data_size ,))
88
89
vali [offset :offset + data_size ] = \
89
- np .uniform (size = (data_size ,)).astype (np .float32 )
90
+ np .random . uniform (size = (data_size ,)).astype (np .float32 )
90
91
indptr [processed :processed + read_lines ] = _indptr + offset
91
92
offset += data_size
92
93
processed += read_lines
0 commit comments