Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions keras_hub/src/models/qwen3_moe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from keras_hub.src.models.qwen3_moe.qwen3_moe_backbone import Qwen3MoeBackbone
from keras_hub.src.models.qwen3_moe.qwen3_moe_presets import backbone_presets
from keras_hub.src.utils.preset_utils import register_presets

register_presets(backbone_presets, Qwen3MoeBackbone)
30 changes: 30 additions & 0 deletions keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Qwen3 MoE model preset configurations."""

backbone_presets = {
"qwen3_moe_30b_a3b_en": {
"metadata": {
"description": (
" Mixture-of-Experts (MoE) model has 30.5 billion total"
" parameters with 3.3 billion activated, built on 48 layers"
" and utilizes 32 query and 4 key/value attention heads"
" with 128 experts (8 active)."
),
"params": 30532122624,
"path": "qwen3_moe",
},
"kaggle_handle": "kaggle://keras/qwen-3-moe/keras/qwen3_moe_30b_a3b_en/2",
},
"qwen3_moe_235b_a22b_en": {
"metadata": {
"description": (
" Mixture-of-Experts (MoE) model has 235 billion"
" total parameters with 22 billion activated, built on 94"
" layers and utilizes 64 query and 4 key/value attention heads"
" with 128 experts (8 active)."
),
"params": 235093634560,
"path": "qwen3_moe",
},
"kaggle_handle": "kaggle://keras/qwen-3-moe/keras/qwen3_moe_235b_a22b_en/1",
},
}
Loading