Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions keras_hub/src/models/qwen3_moe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from keras_hub.src.models.qwen3_moe.qwen3_moe_backbone import Qwen3MoeBackbone
from keras_hub.src.models.qwen3_moe.qwen3_moe_presets import backbone_presets
from keras_hub.src.utils.preset_utils import register_presets

register_presets(backbone_presets, Qwen3MoeBackbone)
30 changes: 30 additions & 0 deletions keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Qwen3 model preset configurations."""

backbone_presets = {
"qwen3_moe_30b_a3b_en": {
"metadata": {
"description": (
"Mixture-of-Experts (MoE) model has 30.5 billion total parameters"
" with 3.3 billion activated, built on 48 layers, and utilizes 32 query "
"and 4 key/value attention heads with 128 experts (8 active)"
"efficiency and fast inference on resource-constrained devices."
),
"params": 30532122624,
"path": "qwen3_moe",
},
"kaggle_handle": "kaggle://keras/qwen-3-moe/keras/qwen3_moe_30b_a3b_en/2",
},
"qweqwen3_moe_235b_a22b_en": {
"metadata": {
"description": (
"Mixture-of-Experts (MoE) model has 235 billion total parameters,"
" with 22 billion activated, built on 94 layers, and utilizes 64 query "
"and 4 key/value attention heads with 128 experts (8 active)."
),
"params": 235093634560,
"path": "qwen3_moe",
},
"kaggle_handle": "kaggle://keras/qwen-3-moe/keras/qwen3_moe_235b_a22b_en/1",
},

}
Loading