Skip to content

Commit 59cea97

Browse files
committed
update
1 parent 3f1787f commit 59cea97

File tree

1 file changed

+139
-0
lines changed

1 file changed

+139
-0
lines changed

paddlenlp/utils/memory_count.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from paddlenlp.transformers import AutoConfig, PretrainedConfig
16+
17+
18+
def activate_memory(config: PretrainedConfig, B=1, S=1024):
19+
H = config.hidden_size
20+
H_ = config.intermediate_size
21+
L = config.num_hidden_layers
22+
A = config.num_attention_heads
23+
num_kv_heads = config.num_key_value_heads
24+
G = A / num_kv_heads
25+
base_activate_memory = L * ((32 + 8 / G) * B * S * H + 8 * B * S * H_ + 8 * B * S + 4 * B * A * S)
26+
activate_memory_size = base_activate_memory * 2 # activate memory
27+
activate_memory_size = activate_memory_size / pow(2, 30)
28+
return activate_memory_size
29+
30+
31+
def sft_memory(config: PretrainedConfig, return_base_model_state=False):
32+
H = config.hidden_size
33+
H_ = config.intermediate_size
34+
L = config.num_hidden_layers
35+
num_attention_heads = config.num_attention_heads
36+
num_kv_heads = config.num_key_value_heads
37+
G = num_attention_heads / num_kv_heads
38+
vocab_size = config.vocab_size
39+
40+
base_model_state = 2 * vocab_size * H + L * (
41+
2 * H + (2 + 2 / G) * H * H + 3 * H * H_ # layernorm # attention projection
42+
) # mlp
43+
44+
if return_base_model_state:
45+
return base_model_state
46+
47+
model_state = (
48+
base_model_state * 2 # model prameters fp16 or bf16
49+
+ base_model_state * 2 # model grad
50+
+ base_model_state * 4 # optimizer 1-order momentum fp32
51+
+ base_model_state * 4 # optimizer 2-order momentum fp32
52+
+ base_model_state * 4
53+
) # optimizer master weight fp32
54+
model_state = model_state / pow(2, 30)
55+
return model_state
56+
57+
58+
def lora_memory(config: PretrainedConfig, R=128):
59+
"""_summary_
60+
61+
Args:
62+
config (PretrainedConfig): _description_
63+
R (int, optional): lora size. Defaults to 128.
64+
65+
Returns:
66+
_type_: _description_
67+
"""
68+
H = config.hidden_size
69+
H_ = config.intermediate_size
70+
L = config.num_hidden_layers
71+
num_attention_heads = config.num_attention_heads
72+
num_kv_heads = config.num_key_value_heads
73+
G = num_attention_heads / num_kv_heads
74+
75+
base_model_state = sft_memory(config, return_base_model_state=True)
76+
77+
base_lora_state = L * (
78+
2 * H + (2 + 2 / G) * (H * R + R * H) + 3 * (H * R + R * H_) # layernorm # attention projection
79+
) # mlp
80+
81+
model_state = (
82+
base_model_state * 2 # model prameters fp16 or bf16
83+
+ base_lora_state * 2 # lora prameters
84+
+ base_lora_state * 2 # model grad
85+
+ base_lora_state * 4 # optimizer 1-order momentum fp32
86+
+ base_lora_state * 4 # optimizer 2-order momentum fp32
87+
+ base_lora_state * 4
88+
) # optimizer master weight fp32
89+
model_state = model_state / pow(2, 30)
90+
return model_state
91+
92+
93+
def qlora_memory(config: PretrainedConfig, R=128, algorithm="weight_only_int8"):
94+
"""_summary_
95+
96+
Args:
97+
config (PretrainedConfig): _description_
98+
r_size (int, optional): _description_. Defaults to 128.
99+
algorithm (str, optional): fp4, nf4, weight_only_int8. Defaults to 'weight_only_int8'.
100+
"""
101+
H = config.hidden_size
102+
H_ = config.intermediate_size
103+
L = config.num_hidden_layers
104+
num_attention_heads = config.num_attention_heads
105+
num_kv_heads = config.num_key_value_heads
106+
G = num_attention_heads / num_kv_heads
107+
108+
base_model_state = sft_memory(config, return_base_model_state=True)
109+
110+
base_lora_state = L * (
111+
2 * H + (2 + 2 / G) * (H * R + R * H) + 3 * (H * R + R * H_) # layernorm # attention projection
112+
) # mlp
113+
114+
model_state = (
115+
base_model_state * 2 # model prameters fp16 or bf16
116+
# + base_lora_state * 2 # lora prameters
117+
+ base_lora_state * 2 # model grad
118+
+ base_lora_state * 4 # optimizer 1-order momentum fp32
119+
+ base_lora_state * 4 # optimizer 2-order momentum fp32
120+
+ base_lora_state * 4
121+
) # optimizer master weight fp32
122+
123+
if algorithm == "fp4":
124+
model_state += base_lora_state * 0.5
125+
elif algorithm == "nf4":
126+
model_state += base_lora_state * 0.5
127+
elif algorithm == "weight_only_int8":
128+
model_state += base_lora_state * 1.0
129+
130+
model_state = model_state / pow(2, 30)
131+
return model_state
132+
133+
134+
config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3.1-70B")
135+
sft_model_size = sft_memory(config)
136+
lora_model_size = lora_memory(config, R=128)
137+
qlora_model_size = qlora_memory(config, R=128, algorithm="weight_only_int8")
138+
activate_memory_size = activate_memory(config, B=1, S=512)
139+
print("SFT Model Size:", f"{sft_model_size:.4f}GB")

0 commit comments

Comments
 (0)