Skip to content

Commit 020a25f

Browse files
authored
Fix ckpt convert bug (#9521)
* refine log * refine * refine * refine
1 parent 2985f90 commit 020a25f

File tree

2 files changed

+2
-3
lines changed

2 files changed

+2
-3
lines changed

paddlenlp/trainer/utils/ckpt_converter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def gen_metadata_and_prepare_source_state_dict(self):
270270
malloc_size = 0
271271
for opt_state_name, opt_state_value in optimizer_state_dict.items():
272272
malloc_size += opt_state_value.numel() * opt_state_value.element_size()
273-
malloc_size = malloc_size.numpy() / 2**20
273+
malloc_size = malloc_size / 2**20
274274
logger.debug(f"{malloc_size} MB of GPU memory were allocated.")
275275

276276
# merge sharding
@@ -555,7 +555,7 @@ def load_state_dict_and_rename(self):
555555
for k, v in state_dict.items():
556556
memory_size += v.numel() * v.element_size()
557557

558-
memory_size = memory_size.numpy() / 2**20
558+
memory_size = memory_size / 2**20
559559
logger.debug(
560560
f"The current rank has finished loading the checkpoint file and has allocated {memory_size} MB of GPU memory."
561561
)

scripts/distribute/ci_case_auto.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ function llama_case_list_auto() {
9595
llama_dy2st_auto_bs4_bf16_DP1-MP1-PP4-SD2
9696
llama_align_dygraph_dy2st_auto_bs2_bf16_DP2-MP1-PP1
9797
llama_pir_auto_fuse_ffn_attention_qkv_MP2
98-
llama_convert_hybrid_ckpt_to_auto_parallel_bs2_fp32_DP2-MP1-PP1
9998
llama_align_dygraph_dy2st_pir_auto_bs2_bf16_DP2-MP2-PP1-SP
10099
llama_align_dygraph_dy2st_pir_auto_bs2_bf16_DP2-MP2-PP2-SP
101100
llama_align_dygraph_dy2st_pir_auto_grad_merge_bs2_fp32_DP1-MP1-PP1

0 commit comments

Comments
 (0)