We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent f888ddc commit 52e118bCopy full SHA for 52e118b
nemo_automodel/components/distributed/parallelizer.py
@@ -1100,7 +1100,7 @@ def megatron_fsdp_strategy_parallelize(
1100
return model, optimizer
1101
1102
# Wrap model with MegatronFSDP.
1103
- fsdp_kwargs = dict(
+ model, optimizer = megatron_fsdp_fully_shard(
1104
module=model,
1105
optimizer=optimizer,
1106
fsdp_unit_modules=megatron_fsdp_unit_modules,
@@ -1122,7 +1122,6 @@ def megatron_fsdp_strategy_parallelize(
1122
fsdp_double_buffer=fsdp_double_buffer,
1123
)
1124
1125
- model, optimizer = megatron_fsdp_fully_shard(**fsdp_kwargs)
1126
1127
1128
0 commit comments