vllm-project
diff --git a/‎benchmarks/benchmark_ngram_proposer.py‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/benchmark_ngram_proposer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/benchmark_serving_structured_output.py‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/benchmark_serving_structured_output.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎csrc/cutlass_extensions/vllm_cutlass_library_extension.py‎
Lines changed: 3 additions & 3 deletions b/‎csrc/cutlass_extensions/vllm_cutlass_library_extension.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/offline_inference/vision_language_pooling.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/offline_inference/vision_language_pooling.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/online_serving/disaggregated_serving/disagg_proxy_demo.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/online_serving/disaggregated_serving/disagg_proxy_demo.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 0 additions & 46 deletions b/‎pyproject.toml‎
Lines changed: 0 additions & 46 deletions
diff --git a/‎tests/compile/piecewise/test_simple.py‎
Lines changed: 17 additions & 11 deletions b/‎tests/compile/piecewise/test_simple.py‎
Lines changed: 17 additions & 11 deletions
diff --git a/‎tests/compile/piecewise/test_toy_llama.py‎
Lines changed: 9 additions & 5 deletions b/‎tests/compile/piecewise/test_toy_llama.py‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎tests/compile/test_functionalization.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/compile/test_functionalization.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/compile/test_fusion_attn.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/compile/test_fusion_attn.py‎
Lines changed: 1 addition & 1 deletion
@@ -164,7 +164,7 @@ def invoke_main() -> None:
     )
     parser.add_argument(
         "--batched", action="store_true", help="consider time to prepare batch"
-    )  # noqa: E501
+    )
     parser.add_argument(
         "--num-iteration",
         type=int,
 
@@ -909,13 +909,13 @@ def create_argument_parser():
     parser.add_argument(
         "--tokenizer",
         type=str,
-        help="Name or path of the tokenizer, if not using the default tokenizer.",  # noqa: E501
+        help="Name or path of the tokenizer, if not using the default tokenizer.",
     )
     parser.add_argument(
         "--tokenizer-mode",
         type=str,
         default="auto",
-        help="Name or path of the tokenizer, if not using the default tokenizer.",  # noqa: E501
+        help="Name or path of the tokenizer, if not using the default tokenizer.",
     )
     parser.add_argument(
         "--num-prompts",
 
@@ -72,8 +72,8 @@ class MixedInputKernelScheduleType(enum.Enum):
 ] = {
     **KernelScheduleTag,  # type: ignore
     **{
-        MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized",
-        MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong",
-        MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative",
+        MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized",  # noqa: E501
+        MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong",  # noqa: E501
+        MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative",  # noqa: E501
     },
 }
@@ -113,7 +113,7 @@ def run_e5_v(query: Query) -> ModelRequestData:
 def _get_vlm2vec_prompt_image(query: Query, image_token: str):
     if query["modality"] == "text":
         text = query["text"]
-        prompt = f"Find me an everyday image that matches the given caption: {text}"  # noqa: E501
+        prompt = f"Find me an everyday image that matches the given caption: {text}"
         image = None
     elif query["modality"] == "image":
         prompt = f"{image_token} Find a day-to-day image that looks similar to the provided image."  # noqa: E501
 
@@ -203,9 +203,9 @@ async def forward_request(self, url, data, use_chunked=True):
                 async with session.post(
                     url=url, json=data, headers=headers
                 ) as response:
-                    if 200 <= response.status < 300 or 400 <= response.status < 500:  # noqa: E501
+                    if 200 <= response.status < 300 or 400 <= response.status < 500:
                         if use_chunked:
-                            async for chunk_bytes in response.content.iter_chunked(  # noqa: E501
+                            async for chunk_bytes in response.content.iter_chunked(
                                 1024
                             ):
                                 yield chunk_bytes
 
@@ -56,52 +56,6 @@ include = ["vllm*"]
 "vllm/third_party/**" = ["ALL"]
 "vllm/version.py" = ["F401"]
 "vllm/_version.py" = ["ALL"]
-# TEMPORARY! These ignores will be fixed forward
-## Line length violations
-"csrc/cutlass_extensions/vllm_cutlass_library_extension.py" = ["E501"]
-"tests/compile/piecewise/test_simple.py" = ["E501"]
-"tests/compile/piecewise/test_toy_llama.py" = ["E501", "B023"]
-"tests/entrypoints/conftest.py" = ["E501"]
-"tests/entrypoints/openai/test_audio.py" = ["E501"]
-"tests/entrypoints/openai/test_chat.py" = ["E501"]
-"tests/entrypoints/openai/test_chat_template.py" = ["E501"]
-"tests/entrypoints/openai/test_chat_with_tool_reasoning.py" = ["E501"]
-"tests/entrypoints/openai/test_completion_with_function_calling.py" = ["E501"]
-"tests/entrypoints/openai/test_video.py" = ["E501"]
-"tests/entrypoints/openai/test_vision.py" = ["E501"]
-"tests/entrypoints/test_chat_utils.py" = ["E501"]
-"tests/kernels/moe/modular_kernel_tools/common.py" = ["E501"]
-"tests/models/language/generation/test_gemma.py" = ["E501"]
-"tests/models/language/generation/test_mistral.py" = ["E501"]
-"tests/models/multimodal/generation/test_ultravox.py" = ["E501"]
-"tests/models/multimodal/generation/test_voxtral.py" = ["E501"]
-"tests/models/multimodal/generation/vlm_utils/custom_inputs.py" = ["E501"]
-"tests/tool_use/test_tool_choice_required.py" = ["E501"]
-"tests/v1/attention/utils.py" = ["E501"]
-"tests/v1/entrypoints/openai/responses/test_image.py" = ["E501"]
-"tests/v1/kv_connector/nixl_integration/test_accuracy.py" = ["E501"]
-"tests/v1/kv_connector/unit/test_offloading_connector.py" = ["E501"]
-"tests/v1/logits_processors/test_custom_offline.py" = ["E501"]
-"vllm/attention/ops/pallas_kv_cache_update.py" = ["E501"]
-"vllm/compilation/collective_fusion.py" = ["E501"]
-"vllm/compilation/wrapper.py" = ["E501"]
-"vllm/config/vllm.py" = ["E501"]
-"vllm/distributed/device_communicators/all2all.py" = ["E501"]
-"vllm/entrypoints/openai/protocol.py" = ["E501"]
-"vllm/lora/layers/vocal_parallel_embedding.py" = ["E501"]
-"vllm/model_executor/model_loader/bitsandbytes_loader.py" = ["E501"]
-"vllm/model_executor/models/bailing_moe.py" = ["E501"]
-"vllm/model_executor/models/hyperclovax_vision.py" = ["E501"]
-"vllm/model_executor/models/llama4_eagle.py" = ["E501"]
-"vllm/model_executor/models/longcat_flash_mtp.py" = ["E501"]
-"vllm/model_executor/models/phi4mm.py" = ["E501"]
-"vllm/model_executor/models/qwen3_next.py" = ["E501"]
-"vllm/model_executor/layers/quantization/ptpc_fp8.py" = ["E501"]
-"vllm/v1/attention/backends/mla/common.py" = ["E501"]
-"vllm/v1/engine/utils.py" = ["E501"]
-"vllm/v1/utils.py" = ["E501"]
-"vllm/v1/worker/gpu_model_runner.py" = ["E501"]
-# End of temporary ignores
 
 [tool.ruff.lint]
 select = [
 
@@ -132,10 +132,14 @@ def test_simple_piecewise_compile(use_inductor):
         splitting_ops=["silly.attention"],
         use_inductor_graph_partition=False,
         use_inductor=use_inductor,
-        expected_num_piecewise_graphs_seen=5,  # 2 * num_layers + 1
-        expected_num_piecewise_capturable_graphs_seen=3,  # 1 + num_layers
-        expected_num_backend_compilations=3,  # num_piecewise_capturable_graphs_seen
-        expected_num_cudagraph_captured=6,  # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        # 2 * num_layers + 1
+        expected_num_piecewise_graphs_seen=5,
+        # 1 + num_layers
+        expected_num_piecewise_capturable_graphs_seen=3,
+        # num_piecewise_capturable_graphs_seen
+        expected_num_backend_compilations=3,
+        # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        expected_num_cudagraph_captured=6,
     )
 
 
@@ -147,14 +151,16 @@ def test_simple_inductor_graph_partition(splitting_ops):
         pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
 
     _run_simple_model(
-        # inductor graph partition automatically resets splitting_ops
-        # to be an empty list
+        # Inductor graph partition automatically resets splitting_ops to an empty list
         splitting_ops=splitting_ops,
         use_inductor_graph_partition=True,
         use_inductor=True,
-        expected_num_piecewise_graphs_seen=1,  # since not splitting at fx graph level
-        expected_num_piecewise_capturable_graphs_seen=1,  # since not splitting at fx graph level
-        expected_num_backend_compilations=1,  # since not splitting at fx graph level
-        expected_num_cudagraph_captured=6,  # inductor graph partition still captures 6
-        # graph, same as fx graph partition.
+        # Since not splitting at fx graph level
+        expected_num_piecewise_graphs_seen=1,
+        # Since not splitting at fx graph level
+        expected_num_piecewise_capturable_graphs_seen=1,
+        # Since not splitting at fx graph level
+        expected_num_backend_compilations=1,
+        # Inductor graph partition still captures 6 graph, same as fx graph partition
+        expected_num_cudagraph_captured=6,
     )
@@ -367,11 +367,14 @@ def test_toy_llama(use_inductor: bool):
         kwargs = {"num_eager_compiles": 1, "num_inductor_compiles": 0}
 
     with compilation_counter.expect(
-        num_graphs_seen=1,  # one graph for the model
+        # One graph for the model
+        num_graphs_seen=1,
         num_piecewise_graphs_seen=1,
         num_piecewise_capturable_graphs_seen=1,
-        num_backend_compilations=1,  # num_piecewise_capturable_graphs_seen
-        num_cudagraph_captured=2,  # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        # num_piecewise_capturable_graphs_seen
+        num_backend_compilations=1,
+        # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        num_cudagraph_captured=2,
         **kwargs,
     ):
         outputs.append(
@@ -478,9 +481,10 @@ def benchmark():
                 # it is fine here, because we only use the lambda function once.
                 runtime = do_bench(
                     lambda: graphs[b][0](  # noqa
-                        input_ids[:b], positions[:b]
+                        input_ids[:b],  # noqa
+                        positions[:b],  # noqa
                     )
-                )  # noqa
+                )
                 piecewise_cudagraph_time[b] = runtime
             else:
                 runtime = do_bench(lambda: graphs[b][0].replay())  # noqa
 
@@ -243,7 +243,7 @@ def test_fix_functionalization(model_class: torch.nn.Module, do_fusion: bool):
     # check if the functionalization pass is applied
     for op in model.ops_in_model(do_fusion):
         find_auto_fn(backend_no_func.graph_post_pass.nodes, op)
-        assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None  # noqa: E501
+        assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None
 
     # make sure the ops were all de-functionalized
     found = dict()
 
@@ -565,7 +565,7 @@ def test_attention_quant_pattern(
     elif quant_key.dtype == FP4_DTYPE:
         assert attn_nodes_post[0].kwargs.get("output_block_scale") is not None, (
             "Attention should have output_block_scale after FP4 fusion"
-        )  # noqa: E501
+        )
 
     # Check that results are close
     torch.testing.assert_close(result_unfused, result_fused_1, atol=1e-2, rtol=1e-2)
Original file line number	Diff line number	Diff line change
`@@ -164,7 +164,7 @@ def invoke_main() -> None:`
`164`	`164`	`)`
`165`	`165`	`parser.add_argument(`
`166`	`166`	`"--batched", action="store_true", help="consider time to prepare batch"`
`167`		`- ) # noqa: E501`
	`167`	`+ )`
`168`	`168`	`parser.add_argument(`
`169`	`169`	`"--num-iteration",`
`170`	`170`	`type=int,`