Skip to content

Commit 7799017

Browse files
committed
debug
1 parent 31b4061 commit 7799017

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

csrc/deepep/deep_ep.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,15 @@ Buffer::internode_dispatch(
797797
recv_data, token_server_idx, token_unique_per_server, ep_rank_token_cnt, recv_tokens_per_expert,
798798
src_offset_rank_token_idx, dst_offset_rank_token_idx, token_idx_per_expert, offset_inner, count_outer, expand_idx,
799799
total_recv_token);
800+
at::Tensor cpu = token_idx_per_expert.cpu();
801+
auto acc = cpu.accessor<int, 2>();
802+
803+
for (int i = 0; i < cpu.size(0); ++i) {
804+
for (int j = 0; j < cpu.size(1); ++j) {
805+
std::cout << acc[i][j] << " ";
806+
}
807+
std::cout << std::endl;
808+
}
800809

801810
int total_count = total_recv_token.item<int>();
802811
int num_recv_tokens = (total_count == 0) ? 1 : total_count;

csrc/deepep/ops2/op_kernel/a2/moe_distribute_dispatch_a2_pipeline.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -527,11 +527,9 @@ __aicore__ inline void MoeDistributeDispatchA2Pipeline<TemplateMC2TypeA2Pipeline
527527
SyncFunc<AscendC::HardEvent::S_MTE3>();
528528
DataCopyPad(dstRankRecvRingU8Tensor[hccsItemNum * localDstRank + tokenStructLen_ * index], tokenStructInRdmaTensor_,
529529
tokenStructParams);
530-
DataCopyPad(rdmaRecvRingU8Tensor_[(i * rdmaItemNum + rdmaHead) * tokenStructLen_], tokenStructInRdmaTensor_,
531-
tokenStructParams);
532530
rdmaHead = (rdmaHead + 1) % rdmaItemNum;
533-
hcclTail = (hcclTail + 1) % hccsItemNum;
534531
rdmaHeadTailTensor_.SetValue(i * RING_BUFFER_HEAD_TAIL + 2, rdmaHead);
532+
hcclTail = (hcclTail + 1) % hccsItemNum;
535533
localHccsHeadTailTensor.SetValue(1, hcclTail);
536534
DataCopy(globalHccsHeadTailTensor[localRankId], localHccsHeadTailTensor, hccsHesdTailParams);
537535
}

0 commit comments

Comments
 (0)