Skip to content

Commit b76b17f

Browse files
authored
qwen3 0.3B fix (#3255)
1 parent fac2f64 commit b76b17f

File tree

2 files changed

+20
-29
lines changed

2 files changed

+20
-29
lines changed

fastdeploy/model_executor/layers/linear.py

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,6 @@ def __init__(
266266
)
267267

268268
self.hidden_size = fd_config.model_config.hidden_size
269-
self.weight_shape = [
270-
self.input_size,
271-
self.output_size,
272-
]
273269

274270
assert self.quant_method is not None
275271
self.quant_method.create_weights(
@@ -311,24 +307,21 @@ def __init__(
311307
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
312308
skip_quant (bool): Whether to skip quantization. Defaults to False.
313309
"""
310+
self.fd_config = fd_config
311+
self.nranks = fd_config.parallel_config.tensor_parallel_size
312+
self.input_size = input_size
313+
self.output_size = divide(output_size, self.nranks) # Split the output_size using TP inference.
314+
self.hidden_size = fd_config.model_config.hidden_size
315+
314316
super().__init__(
315317
fd_config=fd_config,
316318
prefix=prefix,
317-
input_size=input_size,
318-
output_size=output_size,
319+
input_size=self.input_size,
320+
output_size=self.output_size,
319321
with_bias=with_bias,
320322
add_bias=add_bias,
321323
skip_quant=skip_quant,
322324
)
323-
self.fd_config = fd_config
324-
self.nranks = fd_config.parallel_config.tensor_parallel_size
325-
self.input_size = input_size
326-
self.output_size = divide(output_size, self.nranks) # Split the output_size using TP inference.
327-
self.hidden_size = fd_config.model_config.hidden_size
328-
self.weight_shape = [
329-
self.input_size,
330-
self.output_size,
331-
]
332325

333326
assert self.quant_method is not None
334327
self.quant_method.create_weights(
@@ -634,15 +627,6 @@ def __init__(
634627
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
635628
skip_quant (bool): Whether to skip quantization. Defaults to False.
636629
"""
637-
super().__init__(
638-
fd_config=fd_config,
639-
prefix=prefix,
640-
input_size=input_size,
641-
output_size=output_size,
642-
with_bias=with_bias,
643-
add_bias=add_bias,
644-
skip_quant=skip_quant,
645-
)
646630
self.fd_config = fd_config
647631
self.skip_quant = False
648632
self.nranks = fd_config.parallel_config.tensor_parallel_size
@@ -654,11 +638,15 @@ def __init__(
654638
self.input_size = divide(input_size, self.nranks)
655639
self.output_size = output_size
656640

657-
self.weight_shape = [
658-
self.input_size,
659-
self.output_size,
660-
]
661-
self._dtype = self._helper.get_default_dtype()
641+
super().__init__(
642+
fd_config=fd_config,
643+
prefix=prefix,
644+
input_size=self.input_size,
645+
output_size=self.output_size,
646+
with_bias=with_bias,
647+
add_bias=add_bias,
648+
skip_quant=skip_quant,
649+
)
662650

663651
assert self.quant_method is not None
664652
self.quant_method.create_weights(

fastdeploy/model_executor/models/qwen3.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,9 @@ def load_weights(self, weights_iterator) -> None:
286286
weight_loader = getattr(param, "weight_loader", default_weight_loader(self.fd_config))
287287
weight_loader(param, loaded_weight)
288288

289+
if self.tie_word_embeddings:
290+
self.lm_head.linear.weight.set_value(self.model.embed_tokens.embeddings.weight.transpose([1, 0]))
291+
289292
@paddle.no_grad()
290293
def set_state_dict(self, state_dict):
291294
"""

0 commit comments

Comments
 (0)