diff --git a/.gitlab/tests.yml b/.gitlab/tests.yml index 7c862c778..ebba0bad4 100644 --- a/.gitlab/tests.yml +++ b/.gitlab/tests.yml @@ -54,20 +54,12 @@ example-torch: timeout: 30m parallel: matrix: - - EXAMPLE: [llm_distill, llm_sparsity, speculative_decoding] + - EXAMPLE: [llm_distill, llm_qat, llm_sparsity, speculative_decoding] script: - pip install ".[hf,dev-test]" - find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done - pytest -s tests/examples/$EXAMPLE -# TODO: Fix llm_qat test hang in GitLab CI -example-failing: - extends: example-torch - allow_failure: true - parallel: - matrix: - - EXAMPLE: [llm_qat] - example-trtllm: extends: example-torch timeout: 60m diff --git a/docs/source/guides/7_nas.rst b/docs/source/guides/7_nas.rst index 888039fcd..98d2b9729 100644 --- a/docs/source/guides/7_nas.rst +++ b/docs/source/guides/7_nas.rst @@ -635,3 +635,12 @@ The difference between NAS and pruning is summarized below. increased training time. - May provide similar performance to NAS in particular applications, however, usually exhibits worse performance due to the limited search space and training time. + + +[Advanced] Adding a new NAS/Prune Algorithm +=========================================== + +* Please refer to this `template `_ + for adding a new NAS algorithm. +* Please refer to `mcore_minitron.py `_ + for an actual example of adding Minitron Pruning algorithm. \ No newline at end of file diff --git a/modelopt/torch/__init__.py b/modelopt/torch/__init__.py index d2a8e7eef..b9d43c5de 100644 --- a/modelopt/torch/__init__.py +++ b/modelopt/torch/__init__.py @@ -34,7 +34,7 @@ if not (_Version("4.48") <= _Version(_transformers_version) < _Version("5.0")): _warnings.warn( - f"transformers version {_transformers_version} is incompatible with nvidia-modelopt and may cause issues. " + f"transformers version {_transformers_version} is not tested with nvidia-modelopt and may cause issues. " "Please install recommended version with `pip install nvidia-modelopt[hf]` if working with HF models.", ) except ImportError: diff --git a/modelopt/torch/opt/plugins/__init__.py b/modelopt/torch/opt/plugins/__init__.py index 79c4367fb..b86ef1eb7 100644 --- a/modelopt/torch/opt/plugins/__init__.py +++ b/modelopt/torch/opt/plugins/__init__.py @@ -19,9 +19,6 @@ from .huggingface import * -with import_plugin("megatron core model config"): - from .megatron_model_config import * - with import_plugin("megatron core dist checkpointing"): from .mcore_dist_checkpointing import * diff --git a/tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py b/tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py index 70ca72f17..d08209e72 100644 --- a/tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py +++ b/tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py @@ -173,7 +173,7 @@ def _test_mamba_parameter_sorting(rank, size): prompt_tokens = torch.randint(0, vocab_size, (batch_size, max_sequence_length)).cuda() y1 = run_mcore_inference(model, prompt_tokens) - dynamic_space.sort_parameters() + mtn.utils.sort_parameters(model) # check if all mamba_num_heads, mamba_head_dim, hidden_size have been sorted sortable_per_pp = [