Skip to content

Commit 6c04638

Browse files
authored
Fix per file ruff ignores related to line length (#26262)
Signed-off-by: Harry Mellor <[email protected]>
1 parent 91ac7f7 commit 6c04638

File tree

65 files changed

+301
-291
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+301
-291
lines changed

benchmarks/benchmark_ngram_proposer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def invoke_main() -> None:
164164
)
165165
parser.add_argument(
166166
"--batched", action="store_true", help="consider time to prepare batch"
167-
) # noqa: E501
167+
)
168168
parser.add_argument(
169169
"--num-iteration",
170170
type=int,

benchmarks/benchmark_serving_structured_output.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -909,13 +909,13 @@ def create_argument_parser():
909909
parser.add_argument(
910910
"--tokenizer",
911911
type=str,
912-
help="Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501
912+
help="Name or path of the tokenizer, if not using the default tokenizer.",
913913
)
914914
parser.add_argument(
915915
"--tokenizer-mode",
916916
type=str,
917917
default="auto",
918-
help="Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501
918+
help="Name or path of the tokenizer, if not using the default tokenizer.",
919919
)
920920
parser.add_argument(
921921
"--num-prompts",

csrc/cutlass_extensions/vllm_cutlass_library_extension.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ class MixedInputKernelScheduleType(enum.Enum):
7272
] = {
7373
**KernelScheduleTag, # type: ignore
7474
**{
75-
MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized",
76-
MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong",
77-
MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative",
75+
MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized", # noqa: E501
76+
MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong", # noqa: E501
77+
MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative", # noqa: E501
7878
},
7979
}

examples/offline_inference/vision_language_pooling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def run_e5_v(query: Query) -> ModelRequestData:
113113
def _get_vlm2vec_prompt_image(query: Query, image_token: str):
114114
if query["modality"] == "text":
115115
text = query["text"]
116-
prompt = f"Find me an everyday image that matches the given caption: {text}" # noqa: E501
116+
prompt = f"Find me an everyday image that matches the given caption: {text}"
117117
image = None
118118
elif query["modality"] == "image":
119119
prompt = f"{image_token} Find a day-to-day image that looks similar to the provided image." # noqa: E501

examples/online_serving/disaggregated_serving/disagg_proxy_demo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,9 @@ async def forward_request(self, url, data, use_chunked=True):
203203
async with session.post(
204204
url=url, json=data, headers=headers
205205
) as response:
206-
if 200 <= response.status < 300 or 400 <= response.status < 500: # noqa: E501
206+
if 200 <= response.status < 300 or 400 <= response.status < 500:
207207
if use_chunked:
208-
async for chunk_bytes in response.content.iter_chunked( # noqa: E501
208+
async for chunk_bytes in response.content.iter_chunked(
209209
1024
210210
):
211211
yield chunk_bytes

pyproject.toml

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -56,52 +56,6 @@ include = ["vllm*"]
5656
"vllm/third_party/**" = ["ALL"]
5757
"vllm/version.py" = ["F401"]
5858
"vllm/_version.py" = ["ALL"]
59-
# TEMPORARY! These ignores will be fixed forward
60-
## Line length violations
61-
"csrc/cutlass_extensions/vllm_cutlass_library_extension.py" = ["E501"]
62-
"tests/compile/piecewise/test_simple.py" = ["E501"]
63-
"tests/compile/piecewise/test_toy_llama.py" = ["E501", "B023"]
64-
"tests/entrypoints/conftest.py" = ["E501"]
65-
"tests/entrypoints/openai/test_audio.py" = ["E501"]
66-
"tests/entrypoints/openai/test_chat.py" = ["E501"]
67-
"tests/entrypoints/openai/test_chat_template.py" = ["E501"]
68-
"tests/entrypoints/openai/test_chat_with_tool_reasoning.py" = ["E501"]
69-
"tests/entrypoints/openai/test_completion_with_function_calling.py" = ["E501"]
70-
"tests/entrypoints/openai/test_video.py" = ["E501"]
71-
"tests/entrypoints/openai/test_vision.py" = ["E501"]
72-
"tests/entrypoints/test_chat_utils.py" = ["E501"]
73-
"tests/kernels/moe/modular_kernel_tools/common.py" = ["E501"]
74-
"tests/models/language/generation/test_gemma.py" = ["E501"]
75-
"tests/models/language/generation/test_mistral.py" = ["E501"]
76-
"tests/models/multimodal/generation/test_ultravox.py" = ["E501"]
77-
"tests/models/multimodal/generation/test_voxtral.py" = ["E501"]
78-
"tests/models/multimodal/generation/vlm_utils/custom_inputs.py" = ["E501"]
79-
"tests/tool_use/test_tool_choice_required.py" = ["E501"]
80-
"tests/v1/attention/utils.py" = ["E501"]
81-
"tests/v1/entrypoints/openai/responses/test_image.py" = ["E501"]
82-
"tests/v1/kv_connector/nixl_integration/test_accuracy.py" = ["E501"]
83-
"tests/v1/kv_connector/unit/test_offloading_connector.py" = ["E501"]
84-
"tests/v1/logits_processors/test_custom_offline.py" = ["E501"]
85-
"vllm/attention/ops/pallas_kv_cache_update.py" = ["E501"]
86-
"vllm/compilation/collective_fusion.py" = ["E501"]
87-
"vllm/compilation/wrapper.py" = ["E501"]
88-
"vllm/config/vllm.py" = ["E501"]
89-
"vllm/distributed/device_communicators/all2all.py" = ["E501"]
90-
"vllm/entrypoints/openai/protocol.py" = ["E501"]
91-
"vllm/lora/layers/vocal_parallel_embedding.py" = ["E501"]
92-
"vllm/model_executor/model_loader/bitsandbytes_loader.py" = ["E501"]
93-
"vllm/model_executor/models/bailing_moe.py" = ["E501"]
94-
"vllm/model_executor/models/hyperclovax_vision.py" = ["E501"]
95-
"vllm/model_executor/models/llama4_eagle.py" = ["E501"]
96-
"vllm/model_executor/models/longcat_flash_mtp.py" = ["E501"]
97-
"vllm/model_executor/models/phi4mm.py" = ["E501"]
98-
"vllm/model_executor/models/qwen3_next.py" = ["E501"]
99-
"vllm/model_executor/layers/quantization/ptpc_fp8.py" = ["E501"]
100-
"vllm/v1/attention/backends/mla/common.py" = ["E501"]
101-
"vllm/v1/engine/utils.py" = ["E501"]
102-
"vllm/v1/utils.py" = ["E501"]
103-
"vllm/v1/worker/gpu_model_runner.py" = ["E501"]
104-
# End of temporary ignores
10559

10660
[tool.ruff.lint]
10761
select = [

tests/compile/piecewise/test_simple.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,14 @@ def test_simple_piecewise_compile(use_inductor):
132132
splitting_ops=["silly.attention"],
133133
use_inductor_graph_partition=False,
134134
use_inductor=use_inductor,
135-
expected_num_piecewise_graphs_seen=5, # 2 * num_layers + 1
136-
expected_num_piecewise_capturable_graphs_seen=3, # 1 + num_layers
137-
expected_num_backend_compilations=3, # num_piecewise_capturable_graphs_seen
138-
expected_num_cudagraph_captured=6, # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
135+
# 2 * num_layers + 1
136+
expected_num_piecewise_graphs_seen=5,
137+
# 1 + num_layers
138+
expected_num_piecewise_capturable_graphs_seen=3,
139+
# num_piecewise_capturable_graphs_seen
140+
expected_num_backend_compilations=3,
141+
# num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
142+
expected_num_cudagraph_captured=6,
139143
)
140144

141145

@@ -147,14 +151,16 @@ def test_simple_inductor_graph_partition(splitting_ops):
147151
pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
148152

149153
_run_simple_model(
150-
# inductor graph partition automatically resets splitting_ops
151-
# to be an empty list
154+
# Inductor graph partition automatically resets splitting_ops to an empty list
152155
splitting_ops=splitting_ops,
153156
use_inductor_graph_partition=True,
154157
use_inductor=True,
155-
expected_num_piecewise_graphs_seen=1, # since not splitting at fx graph level
156-
expected_num_piecewise_capturable_graphs_seen=1, # since not splitting at fx graph level
157-
expected_num_backend_compilations=1, # since not splitting at fx graph level
158-
expected_num_cudagraph_captured=6, # inductor graph partition still captures 6
159-
# graph, same as fx graph partition.
158+
# Since not splitting at fx graph level
159+
expected_num_piecewise_graphs_seen=1,
160+
# Since not splitting at fx graph level
161+
expected_num_piecewise_capturable_graphs_seen=1,
162+
# Since not splitting at fx graph level
163+
expected_num_backend_compilations=1,
164+
# Inductor graph partition still captures 6 graph, same as fx graph partition
165+
expected_num_cudagraph_captured=6,
160166
)

tests/compile/piecewise/test_toy_llama.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -367,11 +367,14 @@ def test_toy_llama(use_inductor: bool):
367367
kwargs = {"num_eager_compiles": 1, "num_inductor_compiles": 0}
368368

369369
with compilation_counter.expect(
370-
num_graphs_seen=1, # one graph for the model
370+
# One graph for the model
371+
num_graphs_seen=1,
371372
num_piecewise_graphs_seen=1,
372373
num_piecewise_capturable_graphs_seen=1,
373-
num_backend_compilations=1, # num_piecewise_capturable_graphs_seen
374-
num_cudagraph_captured=2, # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
374+
# num_piecewise_capturable_graphs_seen
375+
num_backend_compilations=1,
376+
# num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
377+
num_cudagraph_captured=2,
375378
**kwargs,
376379
):
377380
outputs.append(
@@ -478,9 +481,10 @@ def benchmark():
478481
# it is fine here, because we only use the lambda function once.
479482
runtime = do_bench(
480483
lambda: graphs[b][0]( # noqa
481-
input_ids[:b], positions[:b]
484+
input_ids[:b], # noqa
485+
positions[:b], # noqa
482486
)
483-
) # noqa
487+
)
484488
piecewise_cudagraph_time[b] = runtime
485489
else:
486490
runtime = do_bench(lambda: graphs[b][0].replay()) # noqa

tests/compile/test_functionalization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def test_fix_functionalization(model_class: torch.nn.Module, do_fusion: bool):
243243
# check if the functionalization pass is applied
244244
for op in model.ops_in_model(do_fusion):
245245
find_auto_fn(backend_no_func.graph_post_pass.nodes, op)
246-
assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None # noqa: E501
246+
assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None
247247

248248
# make sure the ops were all de-functionalized
249249
found = dict()

tests/compile/test_fusion_attn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ def test_attention_quant_pattern(
565565
elif quant_key.dtype == FP4_DTYPE:
566566
assert attn_nodes_post[0].kwargs.get("output_block_scale") is not None, (
567567
"Attention should have output_block_scale after FP4 fusion"
568-
) # noqa: E501
568+
)
569569

570570
# Check that results are close
571571
torch.testing.assert_close(result_unfused, result_fused_1, atol=1e-2, rtol=1e-2)

0 commit comments

Comments
 (0)