hao-ai-lab
diff --git a/‎fastvideo/v1/fastvideo_args.py‎
Lines changed: 15 additions & 6 deletions b/‎fastvideo/v1/fastvideo_args.py‎
Lines changed: 15 additions & 6 deletions
diff --git a/‎fastvideo/v1/pipelines/composed_pipeline_base.py‎
Lines changed: 12 additions & 2 deletions b/‎fastvideo/v1/pipelines/composed_pipeline_base.py‎
Lines changed: 12 additions & 2 deletions
@@ -34,7 +34,7 @@ class FastVideoArgs:
     # Distributed executor backend
     distributed_executor_backend: str = "mp"
 
-    inference_mode: bool = True  # if False == training mode
+    mode: str = "inference"  # Options: "inference", "training", "distill"
 
     # HuggingFace specific parameters
     trust_remote_code: bool = False
@@ -115,7 +115,15 @@ class FastVideoArgs:
 
     @property
     def training_mode(self) -> bool:
-        return not self.inference_mode
+        return self.mode == "training"
+
+    @property
+    def distill_mode(self) -> bool:
+        return self.mode == "distill"
+
+    @property
+    def inference_mode(self) -> bool:
+        return self.mode == "inference"
 
     def __post_init__(self):
         pass
@@ -150,10 +158,11 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         )
 
         parser.add_argument(
-            "--inference-mode",
-            action=StoreBoolean,
-            default=FastVideoArgs.inference_mode,
-            help="Whether to use inference mode",
+            "--mode",
+            type=str,
+            default=FastVideoArgs.mode,
+            choices=["inference", "training", "distill"],
+            help="The mode to use",
         )
 
         # HuggingFace specific parameters
 
@@ -94,6 +94,12 @@ def __init__(self,
                 self.initialize_validation_pipeline(self.training_args)
             self.initialize_training_pipeline(self.training_args)
 
+        if fastvideo_args.distill_mode:
+            self.initialize_distillation_pipeline(fastvideo_args)
+
+        if fastvideo_args.log_validation:
+            self.initialize_validation_pipeline(fastvideo_args)
+
         self.initialize_pipeline(fastvideo_args)
 
         if not fastvideo_args.training_mode:
@@ -109,6 +115,10 @@ def initialize_validation_pipeline(self, training_args: TrainingArgs):
             "if log_validation is True, the pipeline must implement this method"
         )
 
+    def initialize_distillation_pipeline(self, fastvideo_args: FastVideoArgs):
+        raise NotImplementedError(
+            "if distill_mode is True, the pipeline must implement this method")
+
     @classmethod
     def from_pretrained(cls,
                         model_path: str,
@@ -148,7 +158,7 @@ def from_pretrained(cls,
             config_args = shallow_asdict(config)
             config_args.update(kwargs)
 
-        if args is None or args.inference_mode:
+        if args.mode == "inference":
             fastvideo_args = FastVideoArgs(model_path=model_path,
                                            device_str=device or "cuda" if
                                            torch.cuda.is_available() else "cpu",
@@ -172,7 +182,7 @@ def from_pretrained(cls,
             fastvideo_args.num_gpus = int(os.environ.get("WORLD_SIZE", 1))
             fastvideo_args.use_cpu_offload = False
             # make sure we are in training mode
-            fastvideo_args.inference_mode = False
+            fastvideo_args.mode = args.mode
             # we hijack the precision to be the master weight type so that the
             # model is loaded with the correct precision. Subsequently we will
             # use FSDP2's MixedPrecisionPolicy to set the precision for the