Merge branch 'unify-constuct-model' into llava-support

Gasoonjia · Gasoonjia · commit 83f8501a1e5f · 2024-09-17T11:32:16.000-07:00
diff --git a/torchchat/generate.py b/torchchat/generate.py
@@ -812,7 +812,12 @@ def chat(
 
         elif not generator_args.is_torchtune_model:
             max_seq_length = min(
-                encoded.size(0) + generator_args.max_new_tokens, max_seq_length
+                encoded.size(0) + generator_args.max_new_tokens,
+                (
+                    text_transformer_args.block_size
+                    if text_transformer_args is not None
+                    else 2048
+                ),
             )
 
         max_seq_length = (
diff --git a/torchchat/model.py b/torchchat/model.py
@@ -302,6 +302,8 @@ class ModelArgs:
         transformer_args (Dict[str, Dict[str, Any]]): A dictionary containing the parameters for each transformer in the model.
             The outer dictionary has transformer names as keys and inner dictionaries as values. Each inner dictionary contains
             the parameter names and their corresponding values for the respective transformer.
+            TODO: econcile Dict[str, Any] into tranformer-arg-family classes in future PRs.
+
         use_tiktoken (bool): A flag indicating whether to use TikToken as the tokenizer for the model.
     Note:
         It is recommended to use factory functions to create instances of this class instead of directly using the constructor.
@@ -436,6 +438,9 @@ def __init__(self, config: ModelArgs) -> None:
         super().__init__()
         self.config = config
         self.model = self.build_model()
+
+        # text_transformer_args represents the args for the text transformer in the model.
+        # It should be assigned in the actual model implementation, if any.
         self.text_transformer_args = None
 
     def build_model(self) -> nn.Module: