Skip to content

Commit 10eb24c

Browse files
zRzRzRzRzRzRzRIsotr0pyluccafong
authored
GLM-4 Update (#20736)
Signed-off-by: zRzRzRzRzRzRzR <[email protected]> Signed-off-by: Isotr0py <[email protected]> Signed-off-by: Lu Fang <[email protected]> Co-authored-by: Isotr0py <[email protected]> Co-authored-by: Lu Fang <[email protected]>
1 parent 2e8cbb5 commit 10eb24c

File tree

14 files changed

+2006
-11
lines changed

14 files changed

+2006
-11
lines changed

benchmarks/kernels/benchmark_moe.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,11 @@ def main(args: argparse.Namespace):
576576
topk = config.num_experts_per_tok
577577
intermediate_size = config.intermediate_size
578578
shard_intermediate_size = 2 * intermediate_size // args.tp_size
579-
elif config.architectures[0] in ("DeepseekV3ForCausalLM", "DeepseekV2ForCausalLM"):
579+
elif config.architectures[0] in (
580+
"DeepseekV3ForCausalLM",
581+
"DeepseekV2ForCausalLM",
582+
"Glm4MoeForCausalLM",
583+
):
580584
E = config.n_routed_experts
581585
topk = config.num_experts_per_tok
582586
intermediate_size = config.moe_intermediate_size

benchmarks/kernels/benchmark_moe_permute_unpermute.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ def main(args: argparse.Namespace):
318318
elif (
319319
config.architectures[0] == "DeepseekV3ForCausalLM"
320320
or config.architectures[0] == "DeepseekV2ForCausalLM"
321+
or config.architectures[0] == "Glm4MoeForCausalLM"
321322
):
322323
E = config.n_routed_experts
323324
topk = config.num_experts_per_tok

docs/models/supported_models.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,7 @@ Specified using `--task generate`.
576576
| `Gemma3ForConditionalGeneration` | Gemma 3 | T + I<sup>+</sup> | `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc. | ✅︎ | ✅︎ | ⚠️ |
577577
| `GLM4VForCausalLM`<sup>^</sup> | GLM-4V | T + I | `THUDM/glm-4v-9b`, `THUDM/cogagent-9b-20241220`, etc. | ✅︎ | ✅︎ | ✅︎ |
578578
| `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.1V-9B-Thinking`, etc. | ✅︎ | ✅︎ | ✅︎ |
579+
| `Glm4MoeForCausalLM` | GLM-4.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ |
579580
| `GraniteSpeechForConditionalGeneration` | Granite Speech | T + A | `ibm-granite/granite-speech-3.3-8b` | ✅︎ | ✅︎ | ✅︎ |
580581
| `H2OVLChatModel` | H2OVL | T + I<sup>E+</sup> | `h2oai/h2ovl-mississippi-800m`, `h2oai/h2ovl-mississippi-2b`, etc. | | ✅︎ | ✅︎ |
581582
| `Idefics3ForConditionalGeneration` | Idefics3 | T + I | `HuggingFaceM4/Idefics3-8B-Llama3`, etc. | ✅︎ | | ✅︎ |

tests/models/registry.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,9 @@ def check_available_online(
360360
trust_remote_code=True,
361361
hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501
362362
"Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking", min_transformers_version="4.53"), # noqa: E501
363+
"Glm4MoeForCausalLM": _HfExamplesInfo("THUDM/GLM-4.5",
364+
min_transformers_version="4.54",
365+
is_available_online=False), # noqa: E501
363366
"H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
364367
extras={"2b": "h2oai/h2ovl-mississippi-2b"}, # noqa: E501
365368
max_transformers_version="4.48", # noqa: E501
@@ -485,6 +488,10 @@ def check_available_online(
485488
is_available_online=False,
486489
speculative_model="openbmb/MiniCPM-2B-sft-bf16",
487490
tokenizer="openbmb/MiniCPM-2B-sft-bf16"),
491+
"Glm4MoeMTPModel": _HfExamplesInfo("THUDM/GLM-4.5",
492+
speculative_model="THUDM/GLM-4.5",
493+
min_transformers_version="4.54",
494+
is_available_online=False),
488495
"MiMoMTPModel": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL",
489496
trust_remote_code=True,
490497
speculative_model="XiaomiMiMo/MiMo-7B-RL")

0 commit comments

Comments
 (0)