Skip to content

Commit 2a81772

Browse files
for megatron convert
1 parent 7a68a3e commit 2a81772

File tree

2 files changed

+14
-10
lines changed

2 files changed

+14
-10
lines changed

scripts/convert_cogview4_to_diffusers.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,18 @@
5353
# this is specific to `AdaLayerNormContinuous`:
5454
# diffusers implementation split the linear projection into the scale, shift while CogView4 split it tino shift, scale
5555
def swap_scale_shift(weight, dim):
56-
shift, scale = weight.chunk(2, dim=0)
57-
new_weight = torch.cat([scale, shift], dim=0)
56+
"""
57+
Swap the scale and shift components in the weight tensor.
58+
59+
Args:
60+
weight (torch.Tensor): The original weight tensor.
61+
dim (int): The dimension along which to split.
62+
63+
Returns:
64+
torch.Tensor: The modified weight tensor with scale and shift swapped.
65+
"""
66+
shift, scale = weight.chunk(2, dim=dim)
67+
new_weight = torch.cat([scale, shift], dim=dim)
5868
return new_weight
5969

6070

scripts/convert_cogview4_to_diffusers_megatron.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -189,14 +189,8 @@ def convert_megatron_transformer_checkpoint_to_diffusers(
189189
block_prefix = f"transformer_blocks.{i}."
190190

191191
# AdaLayerNorm
192-
new_state_dict[block_prefix + "norm1.linear.weight"] = swap_scale_shift(
193-
mega[f"decoder.layers.{i}.adaln.weight"], dim=0
194-
)
195-
new_state_dict[block_prefix + "norm1.linear.bias"] = swap_scale_shift(
196-
mega[f"decoder.layers.{i}.adaln.bias"], dim=0
197-
)
198-
199-
# QKV
192+
new_state_dict[block_prefix + "norm1.linear.weight"] = mega[f"decoder.layers.{i}.adaln.weight"]
193+
new_state_dict[block_prefix + "norm1.linear.bias"] = mega[f"decoder.layers.{i}.adaln.bias"]
200194
qkv_weight = mega[f"decoder.layers.{i}.self_attention.linear_qkv.weight"]
201195
qkv_bias = mega[f"decoder.layers.{i}.self_attention.linear_qkv.bias"]
202196

0 commit comments

Comments
 (0)