5/n torchchat init

Gasoonjia · Gasoonjia · commit 43dfdc71e49b · 2024-09-16T17:54:38.000-07:00
diff --git a/torchchat/model.py b/torchchat/model.py
@@ -438,7 +438,7 @@ def build_model(self) -> nn.Module:
         return recipe.fusion_class(**modules)
     
     def _replace_know_params(self, params):
-        patterns = {"QuickGELUActivation()": QuickGELUActivation(), "False": False, "True": True}
+        patterns = {"QuickGELUActivation()": QuickGELUActivation()}
         for key, value in params.items():
             if isinstance(value, Hashable) and value in patterns:
                 params[key] = patterns[value]
diff --git a/torchchat/model_params/llava-1.5.json b/torchchat/model_params/llava-1.5.json
@@ -1,5 +1,6 @@
 {
     "model_type": "llava",
+    "use_tiktoken": true,
     "encoder": {
         "tile_size": 336,
         "patch_size": 14,
@@ -9,7 +10,7 @@
         "out_indices": [
             23
         ],
-        "output_cls_projection": "False",
+        "output_cls_projection": false,
         "max_num_tiles": 1,
         "in_channels": 3,
         "intermediate_act": "QuickGELUActivation()"

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"model_type": "llava",`
	`3`	`+ "use_tiktoken": true,`
`3`	`4`	`"encoder": {`
`4`	`5`	`"tile_size": 336,`
`5`	`6`	`"patch_size": 14,`
`@@ -9,7 +10,7 @@`
`9`	`10`	`"out_indices": [`
`10`	`11`	`23`
`11`	`12`	`],`
`12`		`- "output_cls_projection": "False",`
	`13`	`+ "output_cls_projection": false,`
`13`	`14`	`"max_num_tiles": 1,`
`14`	`15`	`"in_channels": 3,`
`15`	`16`	`"intermediate_act": "QuickGELUActivation()"`