Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fastdeploy/model_executor/layers/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def get_act_fn(act_fn_name: str) -> nn.Layer:
"""Get an activation function by name."""
act_fn_name = act_fn_name.lower()

if act_fn_name.startswith("paddle.nn.Layer"):
if act_fn_name.startswith(("paddle.nn.Layer", "torch.nn.modules")):
activation_name = act_fn_name.split(".")[-1]
if activation_name == "identity":
return nn.Identity()
Expand Down
9 changes: 3 additions & 6 deletions fastdeploy/model_executor/layers/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,8 @@ def __init__(
initializer=nn.initializer.Normal(mean=0.0, std=self.initializer_range),
),
)
if self.world_size > 1:
set_weight_attrs(self.embeddings.weight, {"output_dim": False})
if num_embeddings % self.world_size != 0:
set_weight_attrs(self.embeddings.weight, {"weight_loader", self.weight_loader})
set_weight_attrs(self.embeddings.weight, {"output_dim": False})
set_weight_attrs(self.embeddings.weight, {"weight_loader": self.weight_loader})
else:
# column cut embedding
self.embeddings = nn.Embedding(
Expand All @@ -176,8 +174,7 @@ def __init__(

self.embeddings.weight.is_distributed = True
self.embeddings.weight.split_axis = 1
if self.world_size > 1:
set_weight_attrs(self.embeddings.weight, {"output_dim": True})
set_weight_attrs(self.embeddings.weight, {"output_dim": True})

self.prefix = prefix
self.dropout = nn.Dropout(self.hidden_dropout_prob)
Expand Down
36 changes: 16 additions & 20 deletions fastdeploy/model_executor/models/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.
"""

import os
from collections.abc import Iterable
from typing import Optional, TypeVar

Expand All @@ -40,36 +41,29 @@ def _load_dense_weights(linear: nn.Linear, folder: str, model_config: "ModelConf
from fastdeploy.model_executor.utils import default_weight_loader

filename = "model.safetensors"
file_path = f"{folder}/{filename}" if folder else filename
file_path = f"{model_config.model}/{folder}/{filename}" if folder else filename

try:
file_bytes = get_hf_file_to_dict(file_path, model_config.model, model_config.revision)
if not file_bytes:
if not os.path.exists(file_path):
return False

state_dict = {}
if filename.endswith(".safetensors"):
import io
# only safetensor now
from safetensors.numpy import load_file

from safetensors.numpy import load as load_safetensors

numpy_tensors = load_safetensors(io.BytesIO(file_bytes))
for key, numpy_array in numpy_tensors.items():
state_dict[key] = paddle.to_tensor(numpy_array)
else:
import io

state_dict = paddle.load(io.BytesIO(file_bytes))
numpy_tensors = load_file(file_path)
for key, numpy_array in numpy_tensors.items():
state_dict[key] = paddle.to_tensor(numpy_array)

weight_keys = ["weight", "linear.weight", "dense.weight"]

for weight_key in weight_keys:
if weight_key in state_dict:
weight_loader = getattr(linear.weight, "weight_loader", default_weight_loader)
weight_loader = getattr(linear.weight, "weight_loader", default_weight_loader())
weight_loader(linear.weight, state_dict[weight_key].astype(paddle.float32))
bias_key = weight_key.replace("weight", "bias")
if linear.bias is not None and bias_key in state_dict:
bias_loader = getattr(linear.bias, "weight_loader", default_weight_loader)
bias_loader = getattr(linear.bias, "weight_loader", default_weight_loader())
bias_loader(linear.bias, state_dict[bias_key].astype(paddle.float32))
return True
except Exception as e:
Expand Down Expand Up @@ -98,13 +92,15 @@ def _load_st_projector(model_config: "ModelConfig") -> Optional[nn.Layer]:
layer_config = get_hf_file_to_dict(config_path, model_config.model, model_config.revision)
if not layer_config:
continue
bias_attr = paddle.ParamAttr(name="linear_bias", initializer=paddle.nn.initializer.Constant(0))
linear = nn.Linear(
layer_config.get("in_features", 768),
layer_config.get("out_features", 768),
bias=layer_config.get("bias", True),
layer_config.get("in_features", 768), layer_config.get("out_features", 768), bias_attr=bias_attr
)
if linear.weight._is_initialized:
linear.weight.initialize()
if linear.bias._is_initialized:
linear.bias.initialize()
linear = linear.astype(paddle.float32)

if not _load_dense_weights(linear, folder, model_config):
continue

Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/model_executor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def free_tensor(tensor):
del tensor


def default_weight_loader(fd_config: FDConfig) -> None:
def default_weight_loader(fd_config: FDConfig = None) -> None:
"""Default weight loader"""

def fn(param, loaded_weight, shard_id: Optional[Union[int, str]] = None):
Expand All @@ -165,7 +165,7 @@ def fn(param, loaded_weight, shard_id: Optional[Union[int, str]] = None):
loaded_weight = get_tensor(loaded_weight)
loaded_weight = loaded_weight.transpose([1, 0])
# Tensor parallelism splits the weight along the output_dim
if output_dim is not None and fd_config.parallel_config.tensor_parallel_size > 1:
if output_dim is not None and fd_config is not None and fd_config.parallel_config.tensor_parallel_size > 1:
dim = -1 if output_dim else 0
if isinstance(loaded_weight, paddle.Tensor):
size = loaded_weight.shape[dim]
Expand Down
Loading