2 files changed
+2
-2
lines changed- examples/vlm_finetune/qwen3/qwen3_omni_moe_30b_te_deepep.yaml-113
- nemo_automodel/_transformers/registry.py-9
- nemo_automodel/components/attention/utils.py-2
- nemo_automodel/components/checkpoint/stateful_wrappers.py-4
- nemo_automodel/components/datasets/vlm/collate_fns.py+2-8
- nemo_automodel/components/models/qwen3_moe/layers.py+4-8
- nemo_automodel/components/models/qwen3_omni_moe/__init__.py-15
- nemo_automodel/components/models/qwen3_omni_moe/model.py-447
- nemo_automodel/components/models/qwen3_omni_moe/state_dict_adapter.py-104
- nemo_automodel/components/moe/fsdp_mixin.py-7
- nemo_automodel/components/moe/parallelizer.py-17
- nemo_automodel/recipes/llm/benchmark.py-19
- nemo_automodel/recipes/llm/train_ft.py+1-15
- nemo_automodel/recipes/vlm/finetune.py+74-141
- tests/unit_tests/models/deepseek_v3/test_dsv3_layers.py+1-14
- tests/unit_tests/models/glm4_moe/test_glm4_moe_layers.py+1-3
- tests/unit_tests/models/qwen3_moe/test_qwen3_moe_layers.py+1-3
- tests/unit_tests/models/qwen3_next/test_qwen3_next_layers.py+1-3
- tests/unit_tests/models/qwen3_omni_moe/test_qwen3_omni_moe_model.py-261
- tests/unit_tests/models/qwen3_omni_moe/test_qwen3_omni_moe_state_dict_adapter.py-162
- tests/unit_tests/moe/test_fsdp_mixin.py-19
- tests/unit_tests/moe/test_parallelizer.py+1-52
- tests/unit_tests/recipes/test_finetune_vlm_helpers.py+52-127
- tests/unit_tests/recipes/test_train_ft.py+3-102
Submodule Megatron-Bridge updated 44 files
- 3rdparty/Megatron-LM+1-1
- CONTRIBUTING.md+1-1
- docs/training/checkpointing.md-7
- docs/training/peft.md+1
- examples/recipes/qwen3_next/conf/qwen3_next_80b_a3b_finetune_override_example.yaml-48
- examples/recipes/qwen3_next/finetune_qwen3_next_80b_a3b.py-147
- scripts/performance/argument_parser.py+31-16
- scripts/performance/configs/deepseek/deepseek_llm_pretrain.py+12-12
- scripts/performance/configs/gpt_oss/gpt_oss_llm_pretrain.py+8-8
- scripts/performance/configs/llama3/llama3_llm_pretrain.py+22-23
- scripts/performance/configs/llama31/llama31_llm_pretrain.py+11-11
- scripts/performance/configs/nemotronh/nemotronh_llm_pretrain.py+6-6
- scripts/performance/configs/qwen3/qwen3_llm_pretrain.py+22-22
- scripts/performance/perf_plugins.py+23-10
- scripts/performance/setup_experiment.py+8-11
- scripts/performance/utils/helpers.py+24-29
- scripts/performance/utils/utils.py+6-2
- src/megatron/bridge/data/iterator_utils.py-105
- src/megatron/bridge/data/loaders.py+1-1
- src/megatron/bridge/models/conversion/auto_bridge.py+1-27
- src/megatron/bridge/models/gpt_provider.py-3
- src/megatron/bridge/models/mamba/mamba_provider.py-2
- src/megatron/bridge/recipes/llama/__init__.py-22
- src/megatron/bridge/recipes/llama/llama3.py+22-534
- src/megatron/bridge/recipes/qwen/__init__.py-6
- src/megatron/bridge/recipes/qwen/qwen3_moe.py+1-294
- src/megatron/bridge/recipes/qwen/qwen3_next.py+49-277
- src/megatron/bridge/training/checkpointing.py+21-37
- src/megatron/bridge/training/config.py+1-19
- src/megatron/bridge/training/eval.py+4-44
- src/megatron/bridge/training/train.py+59-105
- src/megatron/bridge/training/utils/checkpoint_utils.py+38-154
- tests/functional_tests/training/test_finetune_lora.py-1
- tests/functional_tests/training/test_pretrain_resume.py-2
- tests/functional_tests/utils.py+1-1
- tests/unit_tests/data/test_finetuning.py-143
- tests/unit_tests/data/test_iterator_utils.py-109
- tests/unit_tests/models/test_auto_bridge.py-40
- tests/unit_tests/recipes/test_llama_recipes.py+24-356
- tests/unit_tests/recipes/test_qwen_recipes.py+17-216
- tests/unit_tests/training/test_checkpointing.py+7-26
- tests/unit_tests/training/test_train.py-131
- tests/unit_tests/training/utils/test_checkpoint_utils.py-46
- uv.lock+164-104
0 commit comments