Skip to content

Commit b071870

Browse files
authored
[LLM] Add merge script for merge tp and pp. (#6713)
1 parent d2c2285 commit b071870

File tree

1 file changed

+88
-0
lines changed

1 file changed

+88
-0
lines changed

llm/llama/megre_tp_and_pp.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import paddle
18+
19+
from paddlenlp.transformers import LlamaConfig, LlamaForCausalLM
20+
from paddlenlp.utils.log import logger
21+
22+
23+
def merge_pipeline_parallel(tp_degree, pp_degree, path):
24+
tp_state_dict_list = []
25+
for tp in range(tp_degree):
26+
tp_state_dict = {}
27+
for pp in range(pp_degree):
28+
tmp = paddle.load(os.path.join(path, f"model_state.tp{tp:0>2d}_pp{pp:0>2d}.pdparams"), return_numpy=True)
29+
for k, v in tmp.items():
30+
tp_state_dict[k] = v
31+
32+
tp_state_dict_list.append(tp_state_dict)
33+
34+
return tp_state_dict_list
35+
36+
37+
def merge_tensor_parallel(cls, state_dict_list, config) -> None:
38+
"""the entry of converting config and converting model file
39+
40+
Args:
41+
input_dir (str | None): the input dir which contains `pytorch_model.bin` and `config.json` file
42+
config (PretrainedConfig): the PretrainedConfig instance of model
43+
"""
44+
name_action_mappings = cls._get_tensor_parallel_mappings(config, is_split=False)
45+
state_keys_map = cls._resolve_prefix_keys(name_action_mappings.keys(), state_dict_list[0].keys())
46+
47+
for k, v in state_keys_map.items():
48+
name_action_mappings[v] = name_action_mappings.pop(k)
49+
50+
state_dict_to_save = {}
51+
for key in state_dict_list[0].keys():
52+
tensor = state_dict_list[0][key]
53+
if key in name_action_mappings:
54+
ret = [x[key] for x in state_dict_list]
55+
action = name_action_mappings.pop(key)
56+
tensor = action(ret)
57+
58+
state_dict_to_save[key] = tensor
59+
60+
if len(name_action_mappings) > 0:
61+
for x in name_action_mappings.keys():
62+
logger.warning(f"key <{x}> need to merge tensor parallel but we can't find in model state.")
63+
64+
print("Finally, we merging state dict to fellowing tensors.")
65+
for k, v in state_dict_to_save.items():
66+
print(k, v.shape, v.dtype)
67+
68+
return state_dict_to_save
69+
70+
71+
def main():
72+
tp_degree = 2
73+
pp_degree = 2
74+
model_name_or_path = "temp_dir_to_your_ckpt"
75+
76+
assert tp_degree > 1
77+
assert pp_degree > 1
78+
config = LlamaConfig.from_pretrained(model_name_or_path)
79+
cls = LlamaForCausalLM
80+
81+
tp_state_dict_list = merge_pipeline_parallel(tp_degree, pp_degree, model_name_or_path)
82+
state_dict_to_save = merge_tensor_parallel(cls=cls, state_dict_list=tp_state_dict_list, config=config)
83+
print("saving")
84+
paddle.save(state_dict_to_save, os.path.join(model_name_or_path, "model_state.pdparams"))
85+
86+
87+
if __name__ == "__main__":
88+
main()

0 commit comments

Comments
 (0)