update transformer config

Gasoonjia · Gasoonjia · commit 4b666a77de30 · 2024-09-16T16:59:29.000-07:00
diff --git a/distributed/parallelize_llama.py b/distributed/parallelize_llama.py
@@ -62,7 +62,7 @@ def apply_tp(
     # after we apply TP to the model. Because we don't want to change model code 
     # when applying TP. We need to have change to ensure KVCache has the correct
     # size as k and v.
-    model.config.transformer_args["text"].n_local_heads = model.config.transformer_args["text"].n_local_heads // tp_mesh.size()
+    model.model.config.n_local_heads = model.model.config.n_local_heads // tp_mesh.size()
 
     # Apply tensor parallelism to every transformer block
     for transformer_block in model.layers:
diff --git a/torchchat/cli/builder.py b/torchchat/cli/builder.py
@@ -563,7 +563,7 @@ def _initialize_model(
                 model.setup_caches(
                     max_batch_size=1,
                     max_seq_length=max_seq_length
-                    or model.config.transformer_args["text"].max_seq_length,
+                    or model.model.config.max_seq_length,
                 )
 
         model.to(dtype=builder_args.precision)
diff --git a/torchchat/export.py b/torchchat/export.py
@@ -54,7 +54,7 @@ def export_for_server(
             torch.tensor([0, 1, 2, 3, 4], dtype=torch.int, device=device),
         )
 
-        seq = Dim("seq", min=1, max=model.config.transformer_args["text"].max_seq_length)
+        seq = Dim("seq", min=1, max=model.model.config.max_seq_length)
         # Specify that the first dimension of each input is that batch size
         dynamic_shapes = {"tokens": {1: seq}, "input_pos": {0: seq}}
     else:
diff --git a/torchchat/usages/eval.py b/torchchat/usages/eval.py
@@ -59,7 +59,7 @@ def setup_cache_padded_seq_input_pos_max_seq_length_for_prefill(
     T = prompt.size(0)
     T_new = T + max_new_tokens
     if max_seq_length is None:
-        max_seq_length = min(T_new, model.config.transformer_args["text"].block_size)
+        max_seq_length = min(T_new, model.model.config.block_size)
 
     device, dtype = prompt.device, prompt.dtype
     # create an empty tensor of the expected final shape and
diff --git a/torchchat/usages/openai_api.py b/torchchat/usages/openai_api.py
@@ -233,11 +233,11 @@ def __init__(self, *args, **kwargs):
 
         super().__init__(*args, **kwargs)
         self.max_seq_length = (
-            self.model.config.transformer_args["text"].max_seq_length
+            self.model.model.config.max_seq_length
             + self.speculative_builder_args.speculate_k
             + 1
             if self.draft_model is not None
-            else self.model.config.transformer_args["text"].max_seq_length
+            else self.model.model.config.max_seq_length
         )
         # The System fingerprint is a unique identifier for the model and its configuration.
         self.system_fingerprint = (

Original file line number	Diff line number	Diff line change
`@@ -563,7 +563,7 @@ def _initialize_model(`
`563`	`563`	`model.setup_caches(`
`564`	`564`	`max_batch_size=1,`
`565`	`565`	`max_seq_length=max_seq_length`
`566`		`- or model.config.transformer_args["text"].max_seq_length,`
	`566`	`+ or model.model.config.max_seq_length,`
`567`	`567`	`)`
`568`	`568`
`569`	`569`	`model.to(dtype=builder_args.precision)`
Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ def export_for_server(`
`54`	`54`	`torch.tensor([0, 1, 2, 3, 4], dtype=torch.int, device=device),`
`55`	`55`	`)`
`56`	`56`
`57`		`- seq = Dim("seq", min=1, max=model.config.transformer_args["text"].max_seq_length)`
	`57`	`+ seq = Dim("seq", min=1, max=model.model.config.max_seq_length)`
`58`	`58`	`# Specify that the first dimension of each input is that batch size`
`59`	`59`	`dynamic_shapes = {"tokens": {1: seq}, "input_pos": {0: seq}}`
`60`	`60`	`else:`