diff --git a/fastdeploy/model_executor/layers/activation.py b/fastdeploy/model_executor/layers/activation.py index 79fd3b24f6..b2b6fe3a3f 100644 --- a/fastdeploy/model_executor/layers/activation.py +++ b/fastdeploy/model_executor/layers/activation.py @@ -164,7 +164,7 @@ def get_act_fn(act_fn_name: str) -> nn.Layer: """Get an activation function by name.""" act_fn_name = act_fn_name.lower() - if act_fn_name.startswith("paddle.nn.Layer"): + if act_fn_name.startswith(("paddle.nn.Layer", "torch.nn.modules")): activation_name = act_fn_name.split(".")[-1] if activation_name == "identity": return nn.Identity() diff --git a/fastdeploy/model_executor/layers/embeddings.py b/fastdeploy/model_executor/layers/embeddings.py index 6df196f654..dae25fa081 100644 --- a/fastdeploy/model_executor/layers/embeddings.py +++ b/fastdeploy/model_executor/layers/embeddings.py @@ -163,10 +163,8 @@ def __init__( initializer=nn.initializer.Normal(mean=0.0, std=self.initializer_range), ), ) - if self.world_size > 1: - set_weight_attrs(self.embeddings.weight, {"output_dim": False}) - if num_embeddings % self.world_size != 0: - set_weight_attrs(self.embeddings.weight, {"weight_loader", self.weight_loader}) + set_weight_attrs(self.embeddings.weight, {"output_dim": False}) + set_weight_attrs(self.embeddings.weight, {"weight_loader": self.weight_loader}) else: # column cut embedding self.embeddings = nn.Embedding( @@ -176,8 +174,7 @@ def __init__( self.embeddings.weight.is_distributed = True self.embeddings.weight.split_axis = 1 - if self.world_size > 1: - set_weight_attrs(self.embeddings.weight, {"output_dim": True}) + set_weight_attrs(self.embeddings.weight, {"output_dim": True}) self.prefix = prefix self.dropout = nn.Dropout(self.hidden_dropout_prob) diff --git a/fastdeploy/model_executor/models/adapters.py b/fastdeploy/model_executor/models/adapters.py index 1f2590acdd..fe282a67dc 100644 --- a/fastdeploy/model_executor/models/adapters.py +++ b/fastdeploy/model_executor/models/adapters.py @@ -14,6 +14,7 @@ # limitations under the License. """ +import os from collections.abc import Iterable from typing import Optional, TypeVar @@ -40,36 +41,29 @@ def _load_dense_weights(linear: nn.Linear, folder: str, model_config: "ModelConf from fastdeploy.model_executor.utils import default_weight_loader filename = "model.safetensors" - file_path = f"{folder}/{filename}" if folder else filename + file_path = f"{model_config.model}/{folder}/{filename}" if folder else filename try: - file_bytes = get_hf_file_to_dict(file_path, model_config.model, model_config.revision) - if not file_bytes: + if not os.path.exists(file_path): return False state_dict = {} - if filename.endswith(".safetensors"): - import io + # only safetensor now + from safetensors.numpy import load_file - from safetensors.numpy import load as load_safetensors - - numpy_tensors = load_safetensors(io.BytesIO(file_bytes)) - for key, numpy_array in numpy_tensors.items(): - state_dict[key] = paddle.to_tensor(numpy_array) - else: - import io - - state_dict = paddle.load(io.BytesIO(file_bytes)) + numpy_tensors = load_file(file_path) + for key, numpy_array in numpy_tensors.items(): + state_dict[key] = paddle.to_tensor(numpy_array) weight_keys = ["weight", "linear.weight", "dense.weight"] for weight_key in weight_keys: if weight_key in state_dict: - weight_loader = getattr(linear.weight, "weight_loader", default_weight_loader) + weight_loader = getattr(linear.weight, "weight_loader", default_weight_loader()) weight_loader(linear.weight, state_dict[weight_key].astype(paddle.float32)) bias_key = weight_key.replace("weight", "bias") if linear.bias is not None and bias_key in state_dict: - bias_loader = getattr(linear.bias, "weight_loader", default_weight_loader) + bias_loader = getattr(linear.bias, "weight_loader", default_weight_loader()) bias_loader(linear.bias, state_dict[bias_key].astype(paddle.float32)) return True except Exception as e: @@ -98,13 +92,15 @@ def _load_st_projector(model_config: "ModelConfig") -> Optional[nn.Layer]: layer_config = get_hf_file_to_dict(config_path, model_config.model, model_config.revision) if not layer_config: continue + bias_attr = paddle.ParamAttr(name="linear_bias", initializer=paddle.nn.initializer.Constant(0)) linear = nn.Linear( - layer_config.get("in_features", 768), - layer_config.get("out_features", 768), - bias=layer_config.get("bias", True), + layer_config.get("in_features", 768), layer_config.get("out_features", 768), bias_attr=bias_attr ) + if linear.weight._is_initialized: + linear.weight.initialize() + if linear.bias._is_initialized: + linear.bias.initialize() linear = linear.astype(paddle.float32) - if not _load_dense_weights(linear, folder, model_config): continue diff --git a/fastdeploy/model_executor/utils.py b/fastdeploy/model_executor/utils.py index 754725691e..0e2f747ed5 100644 --- a/fastdeploy/model_executor/utils.py +++ b/fastdeploy/model_executor/utils.py @@ -153,7 +153,7 @@ def free_tensor(tensor): del tensor -def default_weight_loader(fd_config: FDConfig) -> None: +def default_weight_loader(fd_config: FDConfig = None) -> None: """Default weight loader""" def fn(param, loaded_weight, shard_id: Optional[Union[int, str]] = None): @@ -165,7 +165,7 @@ def fn(param, loaded_weight, shard_id: Optional[Union[int, str]] = None): loaded_weight = get_tensor(loaded_weight) loaded_weight = loaded_weight.transpose([1, 0]) # Tensor parallelism splits the weight along the output_dim - if output_dim is not None and fd_config.parallel_config.tensor_parallel_size > 1: + if output_dim is not None and fd_config is not None and fd_config.parallel_config.tensor_parallel_size > 1: dim = -1 if output_dim else 0 if isinstance(loaded_weight, paddle.Tensor): size = loaded_weight.shape[dim]