[ckpt] fix: Add missing broadcast_model_weights_from_rank0 option for build_parallelize_model() (#548)

Pqlet · web-flow · commit 083873cd269d · 2026-03-10T14:40:50.000-07:00
diff --git a/docs/usage/support_new_models/guide_and_checklist.md b/docs/usage/support_new_models/guide_and_checklist.md
@@ -3,7 +3,7 @@
 **TLDR:** VeOmni patches HuggingFace models at runtime to add FSDP, Sequence Parallelism (SP), Expert Parallelism (EP), and fused kernels. This guide walks you through the integration steps with checklists per model type. For worked examples, see:
 - [qwen3_vl_example.md](./qwen3_vl_example.md) — VLM + MoE (image/video, deepstack, EP)
 - [qwen3_omni_moe_example.md](./qwen3_omni_moe_example.md) — Omni-modal MoE (image/video/audio, talker)
- 
+
 > **Scope note:** This guide currently targets the **transformers v4** integration/patchgen flow.
 > **TODO:** Add a dedicated **transformers v5** section, since modeling code patchgen requires a slightly different approach.
 
diff --git a/veomni/arguments/arguments_types.py b/veomni/arguments/arguments_types.py
@@ -467,6 +467,13 @@ def _validate_accelerator(self):
         )
         if acc.fsdp_config.fsdp_mode == "fsdp2":
             assert self.init_device == "meta", "Please use init_device: meta for FSDP2 training"
+        else:
+            if self.broadcast_model_weights_from_rank0:
+                logger.warning_rank0(
+                    "Ignoring train.broadcast_model_weights_from_rank0=True because it is only "
+                    "used with train.accelerator.fsdp_config.fsdp_mode='fsdp2'. "
+                    f"Received fsdp_mode={acc.fsdp_config.fsdp_mode!r}. Disable this flag or switch to fsdp2.",
+                )
 
     def _derive_batch_config(self):
         acc = self.accelerator
diff --git a/veomni/trainer/base.py b/veomni/trainer/base.py
@@ -325,6 +325,7 @@ def _build_parallelized_model(self):
             ),
             enable_reentrant=args.train.gradient_checkpointing.enable_reentrant,
             enable_forward_prefetch=args.train.accelerator.fsdp_config.forward_prefetch,
+            broadcast_model_weights_from_rank0=args.train.broadcast_model_weights_from_rank0,
         )
         self.model.train()
 

Original file line number	Diff line number	Diff line change
`@@ -325,6 +325,7 @@ def _build_parallelized_model(self):`
`325`	`325`	`),`
`326`	`326`	`enable_reentrant=args.train.gradient_checkpointing.enable_reentrant,`
`327`	`327`	`enable_forward_prefetch=args.train.accelerator.fsdp_config.forward_prefetch,`
	`328`	`+ broadcast_model_weights_from_rank0=args.train.broadcast_model_weights_from_rank0,`
`328`	`329`	`)`
`329`	`330`	`self.model.train()`
`330`	`331`