diff --git a/.gitlab/tests.yml b/.gitlab/tests.yml
index 7c862c778..ebba0bad4 100644
--- a/.gitlab/tests.yml
+++ b/.gitlab/tests.yml
@@ -54,20 +54,12 @@ example-torch:
   timeout: 30m
   parallel:
     matrix:
-      - EXAMPLE: [llm_distill, llm_sparsity, speculative_decoding]
+      - EXAMPLE: [llm_distill, llm_qat, llm_sparsity, speculative_decoding]
   script:
     - pip install ".[hf,dev-test]"
     - find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
     - pytest -s tests/examples/$EXAMPLE
 
-# TODO: Fix llm_qat test hang in GitLab CI
-example-failing:
-  extends: example-torch
-  allow_failure: true
-  parallel:
-    matrix:
-      - EXAMPLE: [llm_qat]
-
 example-trtllm:
   extends: example-torch
   timeout: 60m
diff --git a/docs/source/guides/7_nas.rst b/docs/source/guides/7_nas.rst
index 888039fcd..98d2b9729 100644
--- a/docs/source/guides/7_nas.rst
+++ b/docs/source/guides/7_nas.rst
@@ -635,3 +635,12 @@ The difference between NAS and pruning is summarized below.
         increased training time.
       - May provide similar performance to NAS in particular applications, however, usually exhibits
         worse performance due to the limited search space and training time.
+
+
+[Advanced] Adding a new NAS/Prune Algorithm
+===========================================
+
+* Please refer to this `template <https://github.com/NVIDIA/TensorRT-Model-Optimizer/compare/template/new-nas-mode>`_ 
+  for adding a new NAS algorithm.
+* Please refer to `mcore_minitron.py <https://github.com/NVIDIA/TensorRT-Model-Optimizer/blob/main/modelopt/torch/prune/plugins/mcore_minitron.py>`_
+  for an actual example of adding Minitron Pruning algorithm.
\ No newline at end of file
diff --git a/modelopt/torch/__init__.py b/modelopt/torch/__init__.py
index d2a8e7eef..b9d43c5de 100644
--- a/modelopt/torch/__init__.py
+++ b/modelopt/torch/__init__.py
@@ -34,7 +34,7 @@
 
     if not (_Version("4.48") <= _Version(_transformers_version) < _Version("5.0")):
         _warnings.warn(
-            f"transformers version {_transformers_version} is incompatible with nvidia-modelopt and may cause issues. "
+            f"transformers version {_transformers_version} is not tested with nvidia-modelopt and may cause issues. "
             "Please install recommended version with `pip install nvidia-modelopt[hf]` if working with HF models.",
         )
 except ImportError:
diff --git a/modelopt/torch/opt/plugins/__init__.py b/modelopt/torch/opt/plugins/__init__.py
index 79c4367fb..b86ef1eb7 100644
--- a/modelopt/torch/opt/plugins/__init__.py
+++ b/modelopt/torch/opt/plugins/__init__.py
@@ -19,9 +19,6 @@
 
 from .huggingface import *
 
-with import_plugin("megatron core model config"):
-    from .megatron_model_config import *
-
 with import_plugin("megatron core dist checkpointing"):
     from .mcore_dist_checkpointing import *
 
diff --git a/tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py b/tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py
index 70ca72f17..d08209e72 100644
--- a/tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py
+++ b/tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py
@@ -173,7 +173,7 @@ def _test_mamba_parameter_sorting(rank, size):
     prompt_tokens = torch.randint(0, vocab_size, (batch_size, max_sequence_length)).cuda()
     y1 = run_mcore_inference(model, prompt_tokens)
 
-    dynamic_space.sort_parameters()
+    mtn.utils.sort_parameters(model)
 
     # check if all mamba_num_heads, mamba_head_dim, hidden_size have been sorted
     sortable_per_pp = [