neuralmagic · rahul-tuli · Oct 4, 2025 · Oct 4, 2025 · Oct 4, 2025 · Oct 4, 2025
diff --git a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
@@ -368,7 +368,7 @@ def parse_client_command(cmd: str) -> dict[str, Any]:
         # The GPUs sometimes come in format of "GPUTYPE\nGPUTYPE\n...",
         # we want to turn it into "8xGPUTYPE"
         df["GPU"] = df["GPU"].apply(
-            lambda x: f"{len(x.split('\n'))}x{x.split('\n')[0]}"
+            lambda x: f"{len(x.splitlines())}x{x.splitlines()[0]}"
         )
 
     # get markdown tables

diff --git a/.buildkite/pyproject.toml b/.buildkite/pyproject.toml
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -477,6 +477,7 @@ steps:
   source_file_dependencies:
   - csrc/mamba/
   - tests/kernels/mamba
+  - vllm/model_executor/layers/mamba/ops
   commands:
     - pytest -v -s kernels/mamba
 
@@ -834,11 +835,11 @@ steps:
     - pytest -v -s tests/kernels/moe/test_flashinfer.py
     - pytest -v -s tests/compile/test_silu_mul_quant_fusion.py
 
-- label: GPT-OSS Eval (Blackwell)
+- label: Blackwell GPT-OSS Eval
   timeout_in_minutes: 60
   working_dir: "/vllm-workspace/"
   gpu: b200
-  optional: true # disable while debugging
+  optional: true # run on nightlies
   source_file_dependencies:
   - tests/evals/gpt_oss
   - vllm/model_executor/models/gpt_oss.py
@@ -865,6 +866,16 @@ steps:
   commands:
     - pytest -s -v tests/quantization/test_blackwell_moe.py
 
+- label: Blackwell LM Eval Small Models
+  timeout_in_minutes: 75
+  gpu: b200
+  optional: true # run on nightlies
+  source_file_dependencies:
+  - csrc/
+  - vllm/model_executor/layers/quantization
+  commands:
+  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-blackwell.txt --tp-size=1
+
 #####  1 GPU test  #####
 #####  multi gpus test  #####
 

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -23,6 +23,7 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson
 # Any change to the VllmConfig changes can have a large user-facing impact,
 # so spam a lot of people
 /vllm/config @simon-mo @WoosukKwon @youkaichao @robertgshaw2-redhat @mgoin @tlrmchlsmth @houseroad @hmellor @yewentao256 @ProExpertProg
+/vllm/config/cache.py @simon-mo @WoosukKwon @youkaichao @robertgshaw2-redhat @mgoin @tlrmchlsmth @houseroad @hmellor @yewentao256 @ProExpertProg @heheda12345
 
 # vLLM V1
 /vllm/v1 @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
@@ -13,7 +13,7 @@ jobs:
       actions: write
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
+      - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
         with:
           # Increasing this value ensures that changes to this workflow
           # propagate to all issues and PRs in days rather than months

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -6,28 +6,16 @@ default_stages:
   - manual # Run in CI
 exclude: 'vllm/third_party/.*'
 repos:
-- repo: https://github.com/google/yapf
-  rev: v0.43.0
-  hooks:
-  - id: yapf
-    args: [--in-place, --verbose]
-    # Keep the same list from yapfignore here to avoid yapf failing without any inputs
-    exclude: '(.buildkite|benchmarks|build|examples)/.*'
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.11.7
+  rev: v0.13.3
   hooks:
-  - id: ruff
+  - id: ruff-check
     args: [--output-format, github, --fix]
   - id: ruff-format
-    files: ^(.buildkite|benchmarks|examples)/.*
 - repo: https://github.com/crate-ci/typos
   rev: v1.35.5
   hooks:
   - id: typos
-- repo: https://github.com/PyCQA/isort
-  rev: 6.0.1
-  hooks:
-  - id: isort
 - repo: https://github.com/pre-commit/mirrors-clang-format
   rev: v20.1.3
   hooks:

diff --git a/benchmarks/benchmark_block_pool.py b/benchmarks/benchmark_block_pool.py
@@ -2,9 +2,9 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import gc
 
+from benchmark_utils import TimeCollector
 from tabulate import tabulate
 
-from benchmark_utils import TimeCollector
 from vllm.utils import FlexibleArgumentParser
 from vllm.v1.core.block_pool import BlockPool
 

diff --git a/benchmarks/benchmark_ngram_proposer.py b/benchmarks/benchmark_ngram_proposer.py
@@ -5,9 +5,9 @@
 from unittest import mock
 
 import numpy as np
+from benchmark_utils import TimeCollector
 from tabulate import tabulate
 
-from benchmark_utils import TimeCollector
 from vllm.config import (
     CacheConfig,
     DeviceConfig,
@@ -164,7 +164,7 @@ def invoke_main() -> None:
     )
     parser.add_argument(
         "--batched", action="store_true", help="consider time to prepare batch"
-    )  # noqa: E501
+    )
     parser.add_argument(
         "--num-iteration",
         type=int,

diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py
@@ -37,14 +37,13 @@
 import datasets
 import numpy as np
 import pandas as pd
-from tqdm.asyncio import tqdm
-from transformers import PreTrainedTokenizerBase
-
 from backend_request_func import (
     ASYNC_REQUEST_FUNCS,
     RequestFuncInput,
     RequestFuncOutput,
 )
+from tqdm.asyncio import tqdm
+from transformers import PreTrainedTokenizerBase
 
 try:
     from vllm.transformers_utils.tokenizer import get_tokenizer
@@ -910,13 +909,13 @@ def create_argument_parser():
     parser.add_argument(
         "--tokenizer",
         type=str,
-        help="Name or path of the tokenizer, if not using the default tokenizer.",  # noqa: E501
+        help="Name or path of the tokenizer, if not using the default tokenizer.",
     )
     parser.add_argument(
         "--tokenizer-mode",
         type=str,
         default="auto",
-        help="Name or path of the tokenizer, if not using the default tokenizer.",  # noqa: E501
+        help="Name or path of the tokenizer, if not using the default tokenizer.",
     )
     parser.add_argument(
         "--num-prompts",