File tree Expand file tree Collapse file tree 2 files changed +35
-0
lines changed
keras_hub/src/models/qwen3_moe Expand file tree Collapse file tree 2 files changed +35
-0
lines changed Original file line number Diff line number Diff line change
1
+ from keras_hub .src .models .qwen3_moe .qwen3_moe_backbone import Qwen3MoeBackbone
2
+ from keras_hub .src .models .qwen3_moe .qwen3_moe_presets import backbone_presets
3
+ from keras_hub .src .utils .preset_utils import register_presets
4
+
5
+ register_presets (backbone_presets , Qwen3MoeBackbone )
Original file line number Diff line number Diff line change
1
+ """Qwen3 MoE model preset configurations."""
2
+
3
+ backbone_presets = {
4
+ "qwen3_moe_30b_a3b_en" : {
5
+ "metadata" : {
6
+ "description" : (
7
+ " Mixture-of-Experts (MoE) model has 30.5 billion total"
8
+ " parameters with 3.3 billion activated, built on 48 layers"
9
+ " and utilizes 32 query and 4 key/value attention heads"
10
+ " with 128 experts (8 active)."
11
+ ),
12
+ "params" : 30532122624 ,
13
+ "path" : "qwen3_moe" ,
14
+ },
15
+ "kaggle_handle" : "kaggle://keras/qwen-3-moe/keras/qwen3_moe_30b_a3b_en/2" ,
16
+ },
17
+ "qwen3_moe_235b_a22b_en" : {
18
+ "metadata" : {
19
+ "description" : (
20
+ " Mixture-of-Experts (MoE) model has 235 billion"
21
+ " total parameters with 22 billion activated, built on 94"
22
+ " layers and utilizes 64 query and 4 key/value attention heads"
23
+ " with 128 experts (8 active)."
24
+ ),
25
+ "params" : 235093634560 ,
26
+ "path" : "qwen3_moe" ,
27
+ },
28
+ "kaggle_handle" : "kaggle://keras/qwen-3-moe/keras/qwen3_moe_235b_a22b_en/1" ,
29
+ },
30
+ }
You can’t perform that action at this time.
0 commit comments