nv-auto-deploy
diff --git a/‎tensorrt_llm/_torch/auto_deploy/llm_args.py‎
Lines changed: 2 additions & 2 deletions b/‎tensorrt_llm/_torch/auto_deploy/llm_args.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tensorrt_llm/_torch/auto_deploy/models/factory.py‎
Lines changed: 4 additions & 0 deletions b/‎tensorrt_llm/_torch/auto_deploy/models/factory.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎tensorrt_llm/_torch/auto_deploy/models/hf.py‎
Lines changed: 24 additions & 24 deletions b/‎tensorrt_llm/_torch/auto_deploy/models/hf.py‎
Lines changed: 24 additions & 24 deletions
@@ -157,8 +157,8 @@ class AutoDeployConfig(DynamicYamlMixInForSettings, BaseSettings):
         "If False, auto-detect and use column+row (all_reduce) sharding when possible.",
     )
 
-    use_sharding_from_config: bool = Field(
-        default=True,
+    use_sharding_from_factory: bool = Field(
+        default=False,
         description="If True, use sharding from the model config (if present). "
         "If False, run heuristics to detect sharding.",
     )
 
@@ -96,6 +96,10 @@ def get_quant_config(self) -> Dict:
         """Returns the quantization config for this model or None if not quantized."""
         return {}
 
+    def get_sharding_config(self):
+        """Returns the sharding config for this model or None if not sharded."""
+        return {}
+
     def get_cache_config(self) -> CacheConfig:
         """Return the cache configuration for the model.
 
 
@@ -175,6 +175,16 @@ def _build_model(self, device: DeviceLikeType) -> nn.Module:
             model.post_init()
 
         # if present, initialize sharding config. We need head_dim for colwise sharding.
+        self._set_sharding_config(model_config)
+
+        # patch forward method
+        model.forward = types.MethodType(self._simple_forward, model)
+
+        model.eval()
+        return model
+
+    def _set_sharding_config(self, model_config: PretrainedConfig):
+        """Set the sharding config for the model."""
         self._sharding_config = {}
         self._sharding_config["head_dim"] = 1
         if hasattr(model_config, "base_model_tp_plan"):
@@ -183,30 +193,6 @@ def _build_model(self, device: DeviceLikeType) -> nn.Module:
             self._sharding_config["head_dim"] = model_config.head_dim
         if hasattr(model_config, "num_hidden_layers"):
             self._sharding_config["num_hidden_layers"] = model_config.num_hidden_layers
-        # if it is a multi-modal factory, overwrite the sharding config with the
-        # dedicated sub-configs
-        if hasattr(model_config, "sub_configs") and len(model_config.sub_configs) > 0:
-            # for image-text-to-text models, we only support sharding for the text sub-config
-            if isinstance(self, AutoModelForImageTextToTextFactory):
-                text_config = model_config.sub_configs["text_config"]
-                # if text_config is a class, instantiate it
-                if isinstance(text_config, type):
-                    text_config = text_config()
-                if hasattr(text_config, "base_model_tp_plan"):
-                    self._sharding_config["tp_plan"] = text_config.base_model_tp_plan
-                if hasattr(text_config, "head_dim"):
-                    self._sharding_config["head_dim"] = text_config.head_dim
-                if hasattr(text_config, "num_hidden_layers"):
-                    self._sharding_config["num_hidden_layers"] = text_config.num_hidden_layers
-            else:
-                # TODO: support sharding for other multi-modal models
-                pass
-
-        # patch forward method
-        model.forward = types.MethodType(self._simple_forward, model)
-
-        model.eval()
-        return model
 
     def get_sharding_config(self):
         return self._sharding_config or {}
@@ -394,6 +380,20 @@ def _get_max_position_embeddings_config(self) -> Dict[str, Any]:
             },
         }
 
+    def _set_sharding_config(self, model_config: PretrainedConfig):
+        """Set the sharding config for the model."""
+        self._sharding_config = {}
+        text_config = model_config.sub_configs["text_config"]
+        # if text_config is a class, instantiate it
+        if isinstance(text_config, type):
+            text_config = text_config()
+        if hasattr(text_config, "base_model_tp_plan"):
+            self._sharding_config["tp_plan"] = text_config.base_model_tp_plan
+        if hasattr(text_config, "head_dim"):
+            self._sharding_config["head_dim"] = text_config.head_dim
+        if hasattr(text_config, "num_hidden_layers"):
+            self._sharding_config["num_hidden_layers"] = text_config.num_hidden_layers
+
     @property
     def automodel_from_config(self):
         return AutoModelForImageTextToText.from_config
Original file line number	Diff line number	Diff line change
`@@ -157,8 +157,8 @@ class AutoDeployConfig(DynamicYamlMixInForSettings, BaseSettings):`
`157`	`157`	`"If False, auto-detect and use column+row (all_reduce) sharding when possible.",`
`158`	`158`	`)`
`159`	`159`
`160`		`- use_sharding_from_config: bool = Field(`
`161`		`- default=True,`
	`160`	`+ use_sharding_from_factory: bool = Field(`
	`161`	`+ default=False,`
`162`	`162`	`description="If True, use sharding from the model config (if present). "`
`163`	`163`	`"If False, run heuristics to detect sharding.",`
`164`	`164`	`)`