Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a7794e8
fix(config): add dsv3 tokenizer file
hushenwei2000 Sep 22, 2025
ed4d341
bugfix: ernie dataset tests (#2627)
Jonathans575 Sep 18, 2025
a75a7a7
remove flashmask checker (#2631)
WYB27 Sep 18, 2025
1bae114
【GPT-OSS】update sliding_attention layer use flashmask (#2606)
xiaoguoguo626807 Sep 19, 2025
e5bd1ab
[Bug] Fix precision of gate and e_score_correction_bias in Glm4Moe (#…
DrownFish19 Sep 19, 2025
f2d82ad
Add Qwen3 download source (#2638)
Ace-To-HYB Sep 19, 2025
fd880dc
[CI] update pytest config (#2620)
Liujie0926 Sep 19, 2025
c12cd01
examples update yaml training config (#2644)
llbdyiu66 Sep 19, 2025
ac31b1f
fix save tensor dtype (#2642)
llbdyiu66 Sep 19, 2025
1f31898
Fix the issue of loading ckpt when retraining (#2645)
Ace-To-HYB Sep 19, 2025
ab9273a
feat(MoE Layer): add unified MoE with DeepEP (part 1)
hushenwei2000 Sep 23, 2025
d13391d
feat(MoE Layer): add unified MoE with DeepEP (part 1)
hushenwei2000 Sep 24, 2025
996421f
feat(MoE Layer): add unified MoE with DeepEP (part 3)
hushenwei2000 Sep 25, 2025
3851143
feat(MoE Layer): add unified MoE with DeepEP
hushenwei2000 Sep 25, 2025
2c290ac
feat(MoE Layer): add unified MoE with DeepEP
hushenwei2000 Sep 26, 2025
503cfe2
feat(MoE Layer): add unified MoE with DeepEP
hushenwei2000 Sep 28, 2025
6477ade
fix(config): add dsv3 tokenizer file
hushenwei2000 Sep 22, 2025
df51bbc
Merge branch 'develop' into add_moe_ep
hushenwei2000 Sep 28, 2025
6b20779
Merge branch 'develop' into add_moe_ep
hushenwei2000 Sep 28, 2025
1dcee3e
Merge branch 'develop' into add_moe_ep
hushenwei2000 Sep 28, 2025
80e0999
Format code
hushenwei2000 Sep 28, 2025
c64d4b8
Format code
hushenwei2000 Sep 28, 2025
e94098b
Add customized loss system
hushenwei2000 Sep 29, 2025
a7b3e09
feat _probs_drop_policy
hushenwei2000 Sep 30, 2025
093748d
fix _forward_traditional_moe method
hushenwei2000 Sep 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions paddleformers/nn/moe_deepep/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
from contextlib import suppress
from typing import TYPE_CHECKING

from ...utils.lazy_import import _LazyModule

import_structure = {
"modular_moe_layer": ["ModularMoELayer"],
"moe_communication": ["MoECommunicationInterface", "StandardMoECommunication", "DeepEPMoECommunication"],
"moe_expert": ["MoEExpertInterface", "StandardMoEExpert", "Qwen2MLP"],
"moe_gate": ["PretrainedMoEGate"],
"moe_factory": ["QuickAccessMoEFactory"],
}

if TYPE_CHECKING:
from .modular_moe_layer import *
from .moe_communication import *
from .moe_expert import *
from .moe_factory import *
from .moe_gate import *
else:
sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
import_structure,
module_spec=__spec__,
)
Loading