pytorch
diff --git a/‎torchchat/cli/builder.py‎
Lines changed: 7 additions & 4 deletions b/‎torchchat/cli/builder.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎torchchat/cli/cli.py‎
Lines changed: 14 additions & 0 deletions b/‎torchchat/cli/cli.py‎
Lines changed: 14 additions & 0 deletions
@@ -16,12 +16,9 @@
 import torch._inductor.config
 import torch.nn as nn
 
-from torch.distributed import launcher
-
 from torch.distributed.device_mesh import DeviceMesh
 from torch.distributed.elastic.multiprocessing.errors import record
 from torch.distributed.elastic.utils.distributed import get_free_port
-from torch.distributed.launcher.api import elastic_launch
 
 from torchchat.distributed import launch_distributed, ParallelDims, parallelize_llama
 
@@ -65,6 +62,8 @@ class BuilderArgs:
     num_nodes: int = 1
     pp: int = 1
     tp: int = 1
+    chpt_from: str = "hf"
+    ntokens: int = 40
     is_chat_model: bool = False
     prefill_possible: bool = False
     dynamic_shapes: bool = False
@@ -171,6 +170,8 @@ def from_args(cls, args: argparse.Namespace) -> "BuilderArgs":
         num_nodes = getattr(args, "num_nodes", 1)
         pp = getattr(args, "pp", 1)
         tp = getattr(args, "tp", 1)
+        chpt_from = getattr(args, "chpt_from", "hf")
+        ntokens = getattr(args, "ntokens", 40)
         return cls(
             checkpoint_dir=checkpoint_dir,
             checkpoint_path=checkpoint_path,
@@ -189,6 +190,8 @@ def from_args(cls, args: argparse.Namespace) -> "BuilderArgs":
             num_nodes=num_nodes,
             pp=pp,
             tp=tp,
+            chpt_from=chpt_from,
+            ntokens=ntokens,
             is_chat_model=is_chat_model,
             dynamic_shapes=getattr(args, "dynamic_shapes", False),
             max_seq_length=getattr(args, "max_seq_length", None),
@@ -508,7 +511,7 @@ def _load_model(builder_args: BuilderArgs) -> Model:
 
     model = model.to(device=builder_args.device, dtype=builder_args.precision)
     return model.eval()
-    
+
 
 def _initialize_model(
     builder_args: BuilderArgs,
 
@@ -426,6 +426,20 @@ def _add_distributed_args(parser) -> None:
         # "Tensor parallel degree",
     )
 
+    parser.add_argument(
+        "--ntokens",
+        type=int,
+        default=40,
+        help="Number of tokens to generate",
+    )
+    parser.add_argument(
+        "--chpt-from",
+        type=str,
+        default="hf",  # TODO: change to torchchat once we support it well
+        help="Checkpoint format to load from",
+        choices=["hf", "torchchat"],
+    )
+
 
 # Add CLI Args related to custom model inputs
 def _add_custom_model_args(parser) -> None: