merged main + address comments

Huy Vu2 · Huy Vu2 · commit a147258508fa · 2025-11-06T00:01:47.000-08:00
diff --git a/dfm/src/megatron/model/wan/wan_model.py b/dfm/src/megatron/model/wan/wan_model.py
@@ -290,12 +290,6 @@ def sharded_state_dict(
         """
         sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
 
-        # DEBUGGING
-        # for module in ["t_embedder"]:
-        #     for param_name, param in getattr(self, module).named_parameters():
-        #         weight_key = f"{prefix}{module}.{param_name}"
-        #         self._set_embedder_weights_replica_id(param, sharded_state_dict, weight_key)
-        # DEBUGGING
         # Ensure replica ids for non-transformer embedder weights include pipeline dimension
         for module in ["text_embedding", "time_embedding", "time_projection"]:
             if hasattr(self, module):
diff --git a/dfm/src/megatron/model/wan/wan_step.py b/dfm/src/megatron/model/wan/wan_step.py
@@ -95,7 +95,6 @@ def __call__(
             else:
                 output_tensor = self.diffusion_pipeline.training_step(model, batch)
 
-        # DEBUGGING
         # TODO: do we need to gather output with sequence or context parallelism here
         #       especially when we have pipeline parallelism
 
diff --git a/examples/megatron/recipes/wan/inference_wan.py b/examples/megatron/recipes/wan/inference_wan.py
@@ -230,13 +230,12 @@ def generate(args):
             pipeline_dtype=torch.float32,
         )
 
-        # DEBUGGING
         rank = dist.get_rank()
         if rank == 0:
-            print("tensor_parallel_size:", args.tensor_parallel_size)
-            print("context_parallel_size:", args.context_parallel_size)
-            print("pipeline_parallel_size:", args.pipeline_parallel_size)
-            print("sequence_parallel:", args.sequence_parallel)
+            print("Running inference with tensor_parallel_size:", args.tensor_parallel_size)
+            print("Running inference with context_parallel_size:", args.context_parallel_size)
+            print("Running inference with pipeline_parallel_size:", args.pipeline_parallel_size)
+            print("Running inference with sequence_parallel:", args.sequence_parallel)
             print("\n\n\n")
 
         logging.info("Generating videos ...")