From e79d0dde4a8710e76c76adfa7476a0c48f5c4cc9 Mon Sep 17 00:00:00 2001 From: Laxma Reddy Patlolla Date: Wed, 8 Oct 2025 16:22:17 -0700 Subject: [PATCH 1/4] Register and add the qwen3_moe presets to hub --- keras_hub/src/models/qwen3_moe/__init__.py | 5 ++++ .../src/models/qwen3_moe/qwen3_moe_presets.py | 30 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 keras_hub/src/models/qwen3_moe/__init__.py create mode 100644 keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py diff --git a/keras_hub/src/models/qwen3_moe/__init__.py b/keras_hub/src/models/qwen3_moe/__init__.py new file mode 100644 index 0000000000..65619bdb77 --- /dev/null +++ b/keras_hub/src/models/qwen3_moe/__init__.py @@ -0,0 +1,5 @@ +from keras_hub.src.models.qwen3_moe.qwen3_moe_backbone import Qwen3MoeBackbone +from keras_hub.src.models.qwen3_moe.qwen3_moe_presets import backbone_presets +from keras_hub.src.utils.preset_utils import register_presets + +register_presets(backbone_presets, Qwen3MoeBackbone) diff --git a/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py b/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py new file mode 100644 index 0000000000..5d5222e796 --- /dev/null +++ b/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py @@ -0,0 +1,30 @@ +"""Qwen3 model preset configurations.""" + +backbone_presets = { + "qwen3_moe_30b_a3b_en": { + "metadata": { + "description": ( + "Mixture-of-Experts (MoE) model has 30.5 billion total parameters" + " with 3.3 billion activated, built on 48 layers, and utilizes 32 query " + "and 4 key/value attention heads with 128 experts (8 active)" + "efficiency and fast inference on resource-constrained devices." + ), + "params": 30532122624, + "path": "qwen3_moe", + }, + "kaggle_handle": "kaggle://keras/qwen-3-moe/keras/qwen3_moe_30b_a3b_en/2", + }, + "qweqwen3_moe_235b_a22b_en": { + "metadata": { + "description": ( + "Mixture-of-Experts (MoE) model has 235 billion total parameters," + " with 22 billion activated, built on 94 layers, and utilizes 64 query " + "and 4 key/value attention heads with 128 experts (8 active)." + ), + "params": 235093634560, + "path": "qwen3_moe", + }, + "kaggle_handle": "kaggle://keras/qwen-3-moe/keras/qwen3_moe_235b_a22b_en/1", + }, + +} From d105983e47a94f71708761365a8172f01a857cab Mon Sep 17 00:00:00 2001 From: Laxma Reddy Patlolla Date: Wed, 8 Oct 2025 16:30:27 -0700 Subject: [PATCH 2/4] Update keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py b/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py index 5d5222e796..54004af2e9 100644 --- a/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +++ b/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py @@ -14,7 +14,7 @@ }, "kaggle_handle": "kaggle://keras/qwen-3-moe/keras/qwen3_moe_30b_a3b_en/2", }, - "qweqwen3_moe_235b_a22b_en": { + "qwen3_moe_235b_a22b_en": { "metadata": { "description": ( "Mixture-of-Experts (MoE) model has 235 billion total parameters," From 99da1688d4f853c076f45b2c1f23ab987ba5880e Mon Sep 17 00:00:00 2001 From: Laxma Reddy Patlolla Date: Wed, 8 Oct 2025 16:30:41 -0700 Subject: [PATCH 3/4] Update keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py b/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py index 54004af2e9..d08db2eb8a 100644 --- a/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +++ b/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py @@ -6,8 +6,7 @@ "description": ( "Mixture-of-Experts (MoE) model has 30.5 billion total parameters" " with 3.3 billion activated, built on 48 layers, and utilizes 32 query " - "and 4 key/value attention heads with 128 experts (8 active)" - "efficiency and fast inference on resource-constrained devices." + "and 4 key/value attention heads with 128 experts (8 active), enabling efficiency and fast inference on resource-constrained devices." ), "params": 30532122624, "path": "qwen3_moe", From 0962c14be9363d22d2a3bf0124f0f17631cf5249 Mon Sep 17 00:00:00 2001 From: Laxma Reddy Patlolla Date: Wed, 8 Oct 2025 17:01:52 -0700 Subject: [PATCH 4/4] Fix format issues --- .../src/models/qwen3_moe/qwen3_moe_presets.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py b/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py index d08db2eb8a..5f3b8c1393 100644 --- a/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +++ b/keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py @@ -1,12 +1,13 @@ -"""Qwen3 model preset configurations.""" +"""Qwen3 MoE model preset configurations.""" backbone_presets = { "qwen3_moe_30b_a3b_en": { "metadata": { "description": ( - "Mixture-of-Experts (MoE) model has 30.5 billion total parameters" - " with 3.3 billion activated, built on 48 layers, and utilizes 32 query " - "and 4 key/value attention heads with 128 experts (8 active), enabling efficiency and fast inference on resource-constrained devices." + " Mixture-of-Experts (MoE) model has 30.5 billion total" + " parameters with 3.3 billion activated, built on 48 layers" + " and utilizes 32 query and 4 key/value attention heads" + " with 128 experts (8 active)." ), "params": 30532122624, "path": "qwen3_moe", @@ -16,14 +17,14 @@ "qwen3_moe_235b_a22b_en": { "metadata": { "description": ( - "Mixture-of-Experts (MoE) model has 235 billion total parameters," - " with 22 billion activated, built on 94 layers, and utilizes 64 query " - "and 4 key/value attention heads with 128 experts (8 active)." + " Mixture-of-Experts (MoE) model has 235 billion" + " total parameters with 22 billion activated, built on 94" + " layers and utilizes 64 query and 4 key/value attention heads" + " with 128 experts (8 active)." ), "params": 235093634560, "path": "qwen3_moe", }, "kaggle_handle": "kaggle://keras/qwen-3-moe/keras/qwen3_moe_235b_a22b_en/1", }, - }