Skip to content

Commit c8429d3

Browse files
authored
[cherry-pick 2.2]fix data parallel when VOCAB var in program (#37546)
* fix data parallel when VOCAB var in program * fix ci coverage
1 parent 824c4ef commit c8429d3

File tree

3 files changed

+32
-0
lines changed

3 files changed

+32
-0
lines changed

python/paddle/fluid/dygraph/parallel.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,9 @@ def sync_params_buffers(model,
365365
if getattr(param, "no_sync", False):
366366
continue
367367

368+
if param.type == core.VarDesc.VarType.VOCAB:
369+
continue
370+
368371
model_vars.append(param.detach())
369372
if len(model_vars) == 0:
370373
return

python/paddle/fluid/tests/unittests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ py_test_modules(test_imperative_static_runner_mnist MODULES test_imperative_stat
554554
py_test_modules(test_imperative_static_runner_while MODULES test_imperative_static_runner_while ENVS
555555
FLAGS_cudnn_deterministic=1)
556556
set_tests_properties(test_conv2d_op PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
557+
set_tests_properties(test_faster_tokenizer_op PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
557558
set_tests_properties(test_conv2d_op_depthwise_conv PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
558559
set_tests_properties(test_conv2d_api PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
559560
set_tests_properties(test_conv_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")

python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,34 @@ def test_feed_string_var(self):
388388
exe.run(paddle.static.default_main_program(), feed={'x': self.text})
389389
paddle.disable_static()
390390

391+
def test_data_parallel(self):
392+
self.max_seq_len = 128
393+
self.pad_to_max_seq_len = True
394+
self.is_split_into_words = False
395+
396+
model = paddle.DataParallel(self.faster_tokenizer)
397+
input_ids, token_type_ids = model(
398+
text=self.text_tensor,
399+
do_lower_case=self.bert_tokenizer.do_lower_case,
400+
max_seq_len=self.max_seq_len,
401+
pad_to_max_seq_len=self.pad_to_max_seq_len,
402+
is_split_into_words=self.is_split_into_words)
403+
input_ids = input_ids.numpy()
404+
token_type_ids = token_type_ids.numpy()
405+
406+
encoded_inputs = self.bert_tokenizer(
407+
self.text,
408+
max_seq_len=self.max_seq_len,
409+
pad_to_max_seq_len=self.pad_to_max_seq_len,
410+
is_split_into_words=self.is_split_into_words)
411+
py_input_ids = np.array(encoded_inputs[0]["input_ids"]).reshape([1, -1])
412+
py_token_type_ids = np.array(encoded_inputs[0][
413+
"token_type_ids"]).reshape([1, -1])
414+
self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0, atol=0.01))
415+
self.assertTrue(
416+
np.allclose(
417+
token_type_ids, py_token_type_ids, rtol=0, atol=0.01))
418+
391419

392420
if __name__ == '__main__':
393421
unittest.main()

0 commit comments

Comments
 (0)