diff --git a/README.md b/README.md index fb443dc..0208fc0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +test +test # ***NeuronBlocks*** - Building Your NLP DNN Models Like Playing Lego [![language](https://img.shields.io/badge/language-en%20%7C%20中文-brightgreen.svg)](#language-supported) diff --git a/problem.py b/problem.py index 2610da8..398966c 100644 --- a/problem.py +++ b/problem.py @@ -710,7 +710,9 @@ def encode(self, data_path, file_columns, input_types, file_with_col_header, obj bpe_encoder = None progress = self.get_data_generator_from_file([data_path], file_with_col_header) + encoder_generator = self.encode_data_multi_processor(progress, cpu_num_workers, + file_columns, input_types, object_inputs, answer_column_name, min_sentence_len, extra_feature, max_lengths, fixed_lengths, file_format, bpe_encoder=bpe_encoder) diff --git a/train.py b/train.py index 5173914..ea58a2a 100644 --- a/train.py +++ b/train.py @@ -74,6 +74,7 @@ def load(self, conf, problem, emb_matrix): # load dictionary when (not finetune) and (cache valid) if not conf.pretrained_model_path and not self.dictionary_invalid: problem.load_problem(conf.problem_path) + if not self.embedding_invalid: emb_matrix = np.array(load_from_pkl(conf.emb_pkl_path)) logging.info('[Cache] loading dictionary successfully') @@ -87,6 +88,7 @@ def save(self, conf, params, problem, emb_matrix): os.makedirs(conf.cache_dir) shutil.copy(params.conf_path, os.path.join(conf.cache_dir, 'conf_cache.json')) if self.dictionary_invalid: + if conf.mode == 'philly' and conf.emb_pkl_path.startswith('/hdfs/'): with HDFSDirectTransferer(conf.problem_path, with_hdfs_command=True) as transferer: transferer.pkl_dump(problem.export_problem(conf.problem_path, ret_without_save=True))