qimcis
diff --git a/‎.github/workflows/pre-commit.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pre-commit.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎fastvideo/v1/configs/models/vaes/wanvae.py‎
Lines changed: 1 addition & 1 deletion b/‎fastvideo/v1/configs/models/vaes/wanvae.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎fastvideo/v1/distributed/parallel_state.py‎
Lines changed: 1 addition & 1 deletion b/‎fastvideo/v1/distributed/parallel_state.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎fastvideo/v1/fastvideo_args.py‎
Lines changed: 1 addition & 0 deletions b/‎fastvideo/v1/fastvideo_args.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎fastvideo/v1/pipelines/composed_pipeline_base.py‎
Lines changed: 150 additions & 14 deletions b/‎fastvideo/v1/pipelines/composed_pipeline_base.py‎
Lines changed: 150 additions & 14 deletions
diff --git a/‎fastvideo/v1/pipelines/training_utils.py‎
Lines changed: 0 additions & 41 deletions b/‎fastvideo/v1/pipelines/training_utils.py‎
Lines changed: 0 additions & 41 deletions
diff --git a/‎fastvideo/v1/pipelines/wan/wan_pipeline.py‎
Lines changed: 27 additions & 1 deletion b/‎fastvideo/v1/pipelines/wan/wan_pipeline.py‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎fastvideo/v1/training/__init__.py‎ b/‎fastvideo/v1/training/__init__.py‎
@@ -10,7 +10,7 @@ jobs:
     - uses: actions/checkout@v4
     - uses: actions/setup-python@v5
       with:
-        python-version: "3.10"
+        python-version: "3.12"
     - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
     - run: echo "::add-matcher::.github/workflows/matchers/mypy.json"
     - uses: pre-commit/[email protected]
 
@@ -33,7 +33,7 @@ repos:
     args: [--in-place, --verbose]
     additional_dependencies: [toml] # TODO: Remove when yapf is upgraded
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.11.4
+  rev: v0.11.12
   hooks:
   - id: ruff
     args: [--output-format, github, --fix]
@@ -48,7 +48,7 @@ repos:
   hooks:
   - id: isort
 - repo: https://github.com/jackdewinter/pymarkdown
-  rev: v0.9.29
+  rev: v0.9.30
   hooks:
   - id: pymarkdown
     args: [fix]
 
@@ -63,7 +63,7 @@ def __post_init__(self):
 
 @dataclass
 class WanVAEConfig(VAEConfig):
-    arch_config: VAEArchConfig = field(default_factory=WanVAEArchConfig)
+    arch_config: WanVAEArchConfig = field(default_factory=WanVAEArchConfig)
     use_feature_cache: bool = True
 
     use_tiling: bool = False
 
@@ -655,7 +655,7 @@ def recv_tensor_dict(
                 tensor_dict[key] = value
         return tensor_dict
 
-    def barrier(self):
+    def barrier(self) -> None:
         """Barrier synchronization among the group.
         NOTE: don't use `device_group` here! `barrier` in NCCL is
         terrible because it is internally a broadcast operation with
 
@@ -478,6 +478,7 @@ class TrainingArgs(FastVideoArgs):
     output_dir: str = ""
     checkpoints_total_limit: int = 0
     checkpointing_steps: int = 0
+    resume_from_checkpoint: bool = False
     logging_dir: str = ""
 
     # optimizer & scheduler
 
@@ -5,19 +5,25 @@
 This module defines the base class for pipelines that are composed of multiple stages.
 """
 
+import argparse
 import os
 from abc import ABC, abstractmethod
 from copy import deepcopy
-from typing import Any, Dict, List, Optional, cast
+from typing import Any, Dict, List, Optional, Union, cast
 
 import torch
 
-from fastvideo.v1.fastvideo_args import FastVideoArgs
+from fastvideo.v1.configs.pipelines import (PipelineConfig,
+                                            get_pipeline_config_cls_for_name)
+from fastvideo.v1.distributed import (init_distributed_environment,
+                                      initialize_model_parallel,
+                                      model_parallel_is_initialized)
+from fastvideo.v1.fastvideo_args import FastVideoArgs, TrainingArgs
 from fastvideo.v1.logger import init_logger
 from fastvideo.v1.models.loader.component_loader import PipelineComponentLoader
 from fastvideo.v1.pipelines.pipeline_batch_info import ForwardBatch
 from fastvideo.v1.pipelines.stages import PipelineStage
-from fastvideo.v1.utils import (maybe_download_model,
+from fastvideo.v1.utils import (maybe_download_model, shallow_asdict,
                                 verify_model_config_and_directory)
 
 logger = init_logger(__name__)
@@ -34,20 +40,35 @@ class ComposedPipelineBase(ABC):
 
     is_video_pipeline: bool = False  # To be overridden by video pipelines
     _required_config_modules: List[str] = []
+    training_args: Optional[TrainingArgs] = None
+    fastvideo_args: Optional[FastVideoArgs] = None
 
     # TODO(will): args should support both inference args and training args
     def __init__(self,
                  model_path: str,
                  fastvideo_args: FastVideoArgs,
-                 config: Optional[Dict[str, Any]] = None):
+                 config: Optional[Dict[str, Any]] = None,
+                 required_config_modules: Optional[List[str]] = None):
         """
         Initialize the pipeline. After __init__, the pipeline should be ready to
         use. The pipeline should be stateless and not hold any batch state.
         """
+
+        if fastvideo_args.training_mode:
+            assert isinstance(fastvideo_args, TrainingArgs)
+            self.training_args = fastvideo_args
+            assert self.training_args is not None
+        else:
+            self.fastvideo_args = fastvideo_args
+            assert self.fastvideo_args is not None
+
         self.model_path = model_path
         self._stages: List[PipelineStage] = []
         self._stage_name_mapping: Dict[str, PipelineStage] = {}
 
+        if required_config_modules is not None:
+            self._required_config_modules = required_config_modules
+
         if self._required_config_modules is None:
             raise NotImplementedError(
                 "Subclass must set _required_config_modules")
@@ -59,16 +80,124 @@ def __init__(self,
         else:
             self.config = config
 
+        self.maybe_init_distributed_environment(fastvideo_args)
+
         # Load modules directly in initialization
         logger.info("Loading pipeline modules...")
         self.modules = self.load_modules(fastvideo_args)
 
+        if fastvideo_args.training_mode:
+            assert self.training_args is not None
+            if self.training_args.log_validation:
+                self.initialize_validation_pipeline(self.training_args)
+            self.initialize_training_pipeline(self.training_args)
+
         self.initialize_pipeline(fastvideo_args)
 
-        logger.info("Creating pipeline stages...")
-        self.create_pipeline_stages(fastvideo_args)
+        if not fastvideo_args.training_mode:
+            logger.info("Creating pipeline stages...")
+            self.create_pipeline_stages(fastvideo_args)
+
+    def initialize_training_pipeline(self, training_args: TrainingArgs):
+        raise NotImplementedError(
+            "if training_mode is True, the pipeline must implement this method")
+
+    def initialize_validation_pipeline(self, training_args: TrainingArgs):
+        raise NotImplementedError(
+            "if log_validation is True, the pipeline must implement this method"
+        )
+
+    @classmethod
+    def from_pretrained(cls,
+                        model_path: str,
+                        device: Optional[str] = None,
+                        torch_dtype: Optional[torch.dtype] = None,
+                        pipeline_config: Optional[
+                            Union[str
+                                  | PipelineConfig]] = None,
+                        args: Optional[argparse.Namespace] = None,
+                        required_config_modules: Optional[List[str]] = None,
+                        **kwargs) -> "ComposedPipelineBase":
+        config = None
+        # 1. If users provide a pipeline config, it will override the default pipeline config
+        if isinstance(pipeline_config, PipelineConfig):
+            config = pipeline_config
+        else:
+            config_cls = get_pipeline_config_cls_for_name(model_path)
+            if config_cls is not None:
+                config = config_cls()
+                if isinstance(pipeline_config, str):
+                    config.load_from_json(pipeline_config)
+
+        # 2. If users also provide some kwargs, it will override the pipeline config.
+        # The user kwargs shouldn't contain model config parameters!
+        if config is None:
+            logger.warning("No config found for model %s, using default config",
+                           model_path)
+            config_args = kwargs
+        else:
+            config_args = shallow_asdict(config)
+            config_args.update(kwargs)
+
+        if args is None or args.inference_mode:
+            fastvideo_args = FastVideoArgs(model_path=model_path,
+                                           device_str=device or "cuda" if
+                                           torch.cuda.is_available() else "cpu",
+                                           **config_args)
+
+            fastvideo_args.model_path = model_path
+            fastvideo_args.device_str = device or "cuda" if torch.cuda.is_available(
+            ) else "cpu"
+            for key, value in config_args.items():
+                setattr(fastvideo_args, key, value)
+        else:
+            assert args is not None, "args must be provided for training mode"
+            fastvideo_args = TrainingArgs.from_cli_args(args)
+            # TODO(will): fix this so that its not so ugly
+            fastvideo_args.model_path = model_path
+            fastvideo_args.device_str = device or "cuda" if torch.cuda.is_available(
+            ) else "cpu"
+            for key, value in config_args.items():
+                setattr(fastvideo_args, key, value)
+
+            fastvideo_args.use_cpu_offload = False
+            fastvideo_args.inference_mode = False
+
+        logger.info("fastvideo_args in from_pretrained: %s", fastvideo_args)
+
+        fastvideo_args.check_fastvideo_args()
+
+        return cls(model_path,
+                   fastvideo_args,
+                   required_config_modules=required_config_modules)
+
+    def maybe_init_distributed_environment(self, fastvideo_args: FastVideoArgs):
+        if model_parallel_is_initialized():
+            return
+        local_rank = int(os.environ.get("LOCAL_RANK", -1))
+        world_size = int(os.environ.get("WORLD_SIZE", -1))
+        rank = int(os.environ.get("RANK", -1))
+
+        if local_rank == -1 or world_size == -1 or rank == -1:
+            raise ValueError(
+                "Local rank, world size, and rank must be set. Use torchrun to launch the script."
+            )
 
-    def get_module(self, module_name: str) -> Any:
+        torch.cuda.set_device(local_rank)
+        init_distributed_environment(world_size=world_size,
+                                     rank=rank,
+                                     local_rank=local_rank)
+        assert fastvideo_args.tp_size is not None, "tp_size must be set"
+        assert fastvideo_args.sp_size is not None, "sp_size must be set"
+        initialize_model_parallel(
+            tensor_model_parallel_size=fastvideo_args.tp_size,
+            sequence_model_parallel_size=fastvideo_args.sp_size)
+        device = torch.device(f"cuda:{local_rank}")
+        fastvideo_args.device = device
+
+    def get_module(self, module_name: str, default_value: Any = None) -> Any:
+        if module_name not in self.modules:
+            return default_value
         return self.modules[module_name]
 
     def add_module(self, module_name: str, module: Any):
@@ -114,6 +243,12 @@ def create_pipeline_stages(self, fastvideo_args: FastVideoArgs):
         """
         raise NotImplementedError
 
+    def create_training_stages(self, training_args: TrainingArgs):
+        """
+        Create the training pipeline stages.
+        """
+        raise NotImplementedError
+
     def initialize_pipeline(self, fastvideo_args: FastVideoArgs):
         """
         Initialize the pipeline.
@@ -136,19 +271,21 @@ def load_modules(self, fastvideo_args: FastVideoArgs) -> Dict[str, Any]:
             modules_config
         ) > 1, "model_index.json must contain at least one pipeline module"
 
-        required_modules = [
-            "vae", "text_encoder", "transformer", "scheduler", "tokenizer"
-        ]
-        for module_name in required_modules:
+        for module_name in self.required_config_modules:
             if module_name not in modules_config:
                 raise ValueError(
                     f"model_index.json must contain a {module_name} module")
-        logger.info("Diffusers config passed sanity checks")
 
         # all the component models used by the pipeline
+        required_modules = self.required_config_modules
+        logger.info("Loading required modules: %s", required_modules)
+
         modules = {}
         for module_name, (transformers_or_diffusers,
                           architecture) in modules_config.items():
+            if module_name not in required_modules:
+                logger.info("Skipping module %s", module_name)
+                continue
             component_model_path = os.path.join(self.model_path, module_name)
             module = PipelineComponentLoader.load_module(
                 module_name=module_name,
@@ -164,7 +301,6 @@ def load_modules(self, fastvideo_args: FastVideoArgs) -> Dict[str, Any]:
                 logger.warning("Overwriting module %s", module_name)
             modules[module_name] = module
 
-        required_modules = self.required_config_modules
         # Check if all required modules were loaded
         for module_name in required_modules:
             if module_name not in modules or modules[module_name] is None:
@@ -198,7 +334,7 @@ def forward(
         # Execute each stage
         logger.info("Running pipeline stages: %s",
                     self._stage_name_mapping.keys())
-        logger.info("Batch: %s", batch)
+        # logger.info("Batch: %s", batch)
         for stage in self.stages:
             batch = stage(batch, fastvideo_args)
 
 
@@ -48,7 +48,33 @@ def create_pipeline_stages(self, fastvideo_args: FastVideoArgs):
         self.add_stage(stage_name="latent_preparation_stage",
                        stage=LatentPreparationStage(
                            scheduler=self.get_module("scheduler"),
-                           transformer=self.get_module("transformer")))
+                           transformer=self.get_module("transformer", None)))
+
+        self.add_stage(stage_name="denoising_stage",
+                       stage=DenoisingStage(
+                           transformer=self.get_module("transformer"),
+                           scheduler=self.get_module("scheduler")))
+
+        self.add_stage(stage_name="decoding_stage",
+                       stage=DecodingStage(vae=self.get_module("vae")))
+
+
+class WanValidationPipeline(ComposedPipelineBase):
+    """
+    Validation pipeline for Wan2.1, assumes that the input are preprocess latents.
+    """
+    _required_config_modules = ["vae", "scheduler"]
+
+    def create_pipeline_stages(self, fastvideo_args: FastVideoArgs):
+        """Set up pipeline stages with proper dependency injection."""
+        self.add_stage(stage_name="timestep_preparation_stage",
+                       stage=TimestepPreparationStage(
+                           scheduler=self.get_module("scheduler")))
+
+        self.add_stage(stage_name="latent_preparation_stage",
+                       stage=LatentPreparationStage(
+                           scheduler=self.get_module("scheduler"),
+                           transformer=self.get_module("transformer", None)))
 
         self.add_stage(stage_name="denoising_stage",
                        stage=DenoisingStage(