Skip to content

Commit 7a21bd7

Browse files
author
tanqingshan (A)
committed
bufix
Signed-off-by: tanqingshan (A) <50050625@china.huawei.com>
1 parent da84eb2 commit 7a21bd7

File tree

3 files changed

+23
-10
lines changed

3 files changed

+23
-10
lines changed

vllm_ascend/eplb/adaptor/vllm_adaptor.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,13 @@ def _export_tensor_to_file(self, expert_maps, expert_map_record_path: str):
221221
json.dump(record, f, indent=4)
222222

223223
def do_update_expert_map(self, layer_id, updated_expert_map):
224-
self.expert_map_per_layer[layer_id].copy_(updated_expert_map)
224+
pad_len = self.expert_map_per_layer[layer_id].shape[
225+
0] - updated_expert_map.shape[0]
226+
updated_expert_map_padded = torch.nn.functional.pad(updated_expert_map,
227+
pad=(0, pad_len),
228+
mode='constant',
229+
value=-1)
230+
self.expert_map_per_layer[layer_id].copy_(updated_expert_map_padded)
225231
self.expert_map_per_layer_cpu[layer_id].copy_(updated_expert_map)
226232

227233
def do_update_expert_weight(self, layer_id, local_expert_to_replace,
@@ -234,7 +240,15 @@ def do_update_expert_weight(self, layer_id, local_expert_to_replace,
234240

235241
def do_update_log2phy_map(self, layer_id, updated_log2phy_map):
236242
if self.log2phy_map_per_layer[layer_id] is not None:
237-
self.log2phy_map_per_layer[layer_id].copy_(updated_log2phy_map)
243+
pad_len = self.log2phy_map_per_layer[layer_id].shape[
244+
0] - updated_log2phy_map.shape[0]
245+
updated_log2phy_map_padded = torch.nn.functional.pad(
246+
updated_log2phy_map,
247+
pad=(0, pad_len),
248+
mode='constant',
249+
value=-1)
250+
self.log2phy_map_per_layer[layer_id].copy_(
251+
updated_log2phy_map_padded)
238252

239253
def global2local(self, placement: torch.Tensor,
240254
E_local: int) -> torch.Tensor:
@@ -313,4 +327,4 @@ def determine_expert_map_all(self):
313327
expert_map_all[:, r, start:end] = local_ids.unsqueeze(0).expand(
314328
self.num_moe_layers, -1)
315329

316-
return expert_map_all
330+
return expert_map_all

vllm_ascend/eplb/core/eplb_device_transfer_loader.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,6 @@ def generate_expert_d2d_transfer_task(self, expert_send_info,
5050
)
5151
return
5252

53-
# If neither send nor receive task is needed for this layer on this rank, return
54-
if not (expert_send_info or expert_recv_info):
55-
return
56-
5753
self.updated_expert_map = updated_expert_map
5854

5955
self.layer_id = layer_id
@@ -135,4 +131,4 @@ def update_expert_map_and_weight(self, reqs):
135131
self.state = ExpertWeightUpdateState.WAITING
136132

137133
def load_impl(self, old_expert_table, new_expert_table):
138-
raise NotImplementedError
134+
raise NotImplementedError

vllm_ascend/ops/fused_moe/moe_mlp.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,17 @@ def quant_apply_mlp(hidden_states: torch.Tensor,
127127
if quantized_hidden_states is not None:
128128
dispose_tensor(quantized_hidden_states)
129129
# act_fn: swiglu
130+
group_diff = torch.diff(group_list, dim=0)
131+
new_group = torch.cat([group_list[0].unsqueeze(0), group_diff],
132+
dim=0)
130133
hidden_states, swiglu_out_scale = torch_npu.npu_dequant_swiglu_quant(
131134
x=hidden_states,
132135
weight_scale=w1_scale,
133136
activation_scale=pertoken_scale,
134137
bias=None,
135138
quant_scale=None,
136139
quant_offset=None,
137-
group_index=group_list,
140+
group_index=new_group,
138141
activate_left=True,
139142
quant_mode=1,
140143
)
@@ -295,4 +298,4 @@ def unified_apply_mlp(hidden_states: torch.Tensor,
295298
group_list=group_list,
296299
group_list_type=group_list_type,
297300
topk_scales=topk_scales,
298-
need_trans=need_trans)
301+
need_trans=need_trans)

0 commit comments

Comments
 (0)