Skip to content

Commit 9de7619

Browse files
linfeng-yuanwangxiyuan
authored andcommitted
fix attribute error with torchair and shape mis match in eager mode for deepseek_r1
Signed-off-by: linfeng-yuan <[email protected]> Signed-off-by: wangxiyuan <[email protected]>
1 parent 259ebae commit 9de7619

File tree

2 files changed

+8
-12
lines changed

2 files changed

+8
-12
lines changed

vllm_ascend/models/deepseek_v2.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,7 @@ def __init__(
153153
self.q_lora_rank,
154154
bias=False,
155155
quant_config=quant_config,
156-
prefix=f"{prefix}.q_a_proj",
157-
return_bias=False,
156+
prefix=f"{prefix}.q_a_proj"
158157
)
159158
self.q_a_layernorm = RMSNorm(self.q_lora_rank,
160159
eps=config.rms_norm_eps)
@@ -163,26 +162,23 @@ def __init__(
163162
self.num_heads * self.qk_head_dim,
164163
bias=False,
165164
quant_config=quant_config,
166-
prefix=f"{prefix}.q_b_proj",
167-
return_bias=False,
165+
prefix=f"{prefix}.q_b_proj"
168166
)
169167
else:
170168
self.q_proj = ColumnParallelLinear(
171169
self.hidden_size,
172170
self.num_heads * self.qk_head_dim,
173171
bias=False,
174172
quant_config=quant_config,
175-
prefix=f"{prefix}.q_proj",
176-
return_bias=False,
173+
prefix=f"{prefix}.q_proj"
177174
)
178175

179176
self.kv_a_proj_with_mqa = ReplicatedLinear(
180177
self.hidden_size,
181178
self.kv_lora_rank + self.qk_rope_head_dim,
182179
bias=False,
183180
quant_config=quant_config,
184-
prefix=f"{prefix}.kv_a_proj_with_mqa",
185-
return_bias=False,
181+
prefix=f"{prefix}.kv_a_proj_with_mqa"
186182
)
187183
self.kv_a_layernorm = RMSNorm(self.kv_lora_rank,
188184
eps=config.rms_norm_eps)
@@ -191,16 +187,14 @@ def __init__(
191187
self.num_heads * (self.qk_nope_head_dim + self.v_head_dim),
192188
bias=False,
193189
quant_config=quant_config,
194-
prefix=f"{prefix}.kv_b_proj",
195-
return_bias=False,
190+
prefix=f"{prefix}.kv_b_proj"
196191
)
197192
self.o_proj = CustomDeepseekV2RowParallelLinear(
198193
self.num_heads * self.v_head_dim,
199194
self.hidden_size,
200195
bias=False,
201196
quant_config=quant_config,
202-
prefix=f"{prefix}.o_proj",
203-
return_bias=False,
197+
prefix=f"{prefix}.o_proj"
204198
)
205199

206200
if rope_scaling:

vllm_ascend/torchair/models/torchair_deepseek_v2.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,7 @@ def __init__(
876876
self.tp_size = get_tensor_model_parallel_world_size()
877877
self.tp_rank = get_tp_group().rank_in_group
878878
ascend_config = get_ascend_config()
879+
self.use_mla = False
879880
self.use_sfa = False
880881
# TODO: enable mla in vllm-ascend
881882
if model_config.use_mla:
@@ -884,6 +885,7 @@ def __init__(
884885
self.use_sfa = True
885886
else:
886887
attn_cls = TorchairDeepseekV2MLAAttention # type: ignore[assignment]
888+
self.use_mla = True
887889
else:
888890
attn_cls = DeepseekV2Attention
889891
self.self_attn = attn_cls(

0 commit comments

Comments
 (0)