Skip to content

Commit 27bebcd

Browse files
authored
Convert examples to ruff-format (#18400)
Signed-off-by: Harry Mellor <[email protected]>
1 parent e7523c2 commit 27bebcd

File tree

83 files changed

+2535
-2411
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+2535
-2411
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ repos:
1717
- id: ruff
1818
args: [--output-format, github, --fix]
1919
- id: ruff-format
20-
files: ^(.buildkite|benchmarks)/.*
20+
files: ^(.buildkite|benchmarks|examples)/.*
2121
- repo: https://github.com/codespell-project/codespell
2222
rev: v2.4.1
2323
hooks:

examples/offline_inference/audio_language.py

Lines changed: 80 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
# SPDX-License-Identifier: Apache-2.0
22
"""
3-
This example shows how to use vLLM for running offline inference
3+
This example shows how to use vLLM for running offline inference
44
with the correct prompt format on audio language models.
55
66
For most models, the prompt format should follow corresponding examples
77
on HuggingFace model repository.
88
"""
9+
910
import os
1011
from dataclasses import asdict
1112
from typing import NamedTuple, Optional
@@ -22,7 +23,7 @@
2223
question_per_audio_count = {
2324
0: "What is 1+1?",
2425
1: "What is recited in the audio?",
25-
2: "What sport and what nursery rhyme are referenced?"
26+
2: "What sport and what nursery rhyme are referenced?",
2627
}
2728

2829

@@ -72,8 +73,7 @@ def run_granite_speech(question: str, audio_count: int) -> ModelRequestData:
7273
# MiniCPM-O
7374
def run_minicpmo(question: str, audio_count: int) -> ModelRequestData:
7475
model_name = "openbmb/MiniCPM-o-2_6"
75-
tokenizer = AutoTokenizer.from_pretrained(model_name,
76-
trust_remote_code=True)
76+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
7777
engine_args = EngineArgs(
7878
model=model_name,
7979
trust_remote_code=True,
@@ -82,19 +82,18 @@ def run_minicpmo(question: str, audio_count: int) -> ModelRequestData:
8282
limit_mm_per_prompt={"audio": audio_count},
8383
)
8484

85-
stop_tokens = ['<|im_end|>', '<|endoftext|>']
85+
stop_tokens = ["<|im_end|>", "<|endoftext|>"]
8686
stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
8787

8888
audio_placeholder = "(<audio>./</audio>)" * audio_count
8989
audio_chat_template = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n<|spk_bos|><|spk|><|spk_eos|><|tts_bos|>' }}{% endif %}" # noqa: E501
90-
messages = [{
91-
'role': 'user',
92-
'content': f'{audio_placeholder}\n{question}'
93-
}]
94-
prompt = tokenizer.apply_chat_template(messages,
95-
tokenize=False,
96-
add_generation_prompt=True,
97-
chat_template=audio_chat_template)
90+
messages = [{"role": "user", "content": f"{audio_placeholder}\n{question}"}]
91+
prompt = tokenizer.apply_chat_template(
92+
messages,
93+
tokenize=False,
94+
add_generation_prompt=True,
95+
chat_template=audio_chat_template,
96+
)
9897

9998
return ModelRequestData(
10099
engine_args=engine_args,
@@ -113,7 +112,7 @@ def run_phi4mm(question: str, audio_count: int) -> ModelRequestData:
113112
# Since the vision-lora and speech-lora co-exist with the base model,
114113
# we have to manually specify the path of the lora weights.
115114
speech_lora_path = os.path.join(model_path, "speech-lora")
116-
placeholders = "".join([f"<|audio_{i+1}|>" for i in range(audio_count)])
115+
placeholders = "".join([f"<|audio_{i + 1}|>" for i in range(audio_count)])
117116

118117
prompts = f"<|user|>{placeholders}{question}<|end|><|assistant|>"
119118

@@ -145,15 +144,19 @@ def run_qwen2_audio(question: str, audio_count: int) -> ModelRequestData:
145144
limit_mm_per_prompt={"audio": audio_count},
146145
)
147146

148-
audio_in_prompt = "".join([
149-
f"Audio {idx+1}: "
150-
f"<|audio_bos|><|AUDIO|><|audio_eos|>\n" for idx in range(audio_count)
151-
])
147+
audio_in_prompt = "".join(
148+
[
149+
f"Audio {idx + 1}: <|audio_bos|><|AUDIO|><|audio_eos|>\n"
150+
for idx in range(audio_count)
151+
]
152+
)
152153

153-
prompt = ("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
154-
"<|im_start|>user\n"
155-
f"{audio_in_prompt}{question}<|im_end|>\n"
156-
"<|im_start|>assistant\n")
154+
prompt = (
155+
"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
156+
"<|im_start|>user\n"
157+
f"{audio_in_prompt}{question}<|im_end|>\n"
158+
"<|im_start|>assistant\n"
159+
)
157160

158161
return ModelRequestData(
159162
engine_args=engine_args,
@@ -172,19 +175,22 @@ def run_qwen2_5_omni(question: str, audio_count: int):
172175
limit_mm_per_prompt={"audio": audio_count},
173176
)
174177

175-
audio_in_prompt = "".join([
176-
"<|audio_bos|><|AUDIO|><|audio_eos|>\n" for idx in range(audio_count)
177-
])
178+
audio_in_prompt = "".join(
179+
["<|audio_bos|><|AUDIO|><|audio_eos|>\n" for idx in range(audio_count)]
180+
)
178181

179182
default_system = (
180183
"You are Qwen, a virtual human developed by the Qwen Team, Alibaba "
181184
"Group, capable of perceiving auditory and visual inputs, as well as "
182-
"generating text and speech.")
185+
"generating text and speech."
186+
)
183187

184-
prompt = (f"<|im_start|>system\n{default_system}<|im_end|>\n"
185-
"<|im_start|>user\n"
186-
f"{audio_in_prompt}{question}<|im_end|>\n"
187-
"<|im_start|>assistant\n")
188+
prompt = (
189+
f"<|im_start|>system\n{default_system}<|im_end|>\n"
190+
"<|im_start|>user\n"
191+
f"{audio_in_prompt}{question}<|im_end|>\n"
192+
"<|im_start|>assistant\n"
193+
)
188194
return ModelRequestData(
189195
engine_args=engine_args,
190196
prompt=prompt,
@@ -196,13 +202,10 @@ def run_ultravox(question: str, audio_count: int) -> ModelRequestData:
196202
model_name = "fixie-ai/ultravox-v0_5-llama-3_2-1b"
197203

198204
tokenizer = AutoTokenizer.from_pretrained(model_name)
199-
messages = [{
200-
'role': 'user',
201-
'content': "<|audio|>\n" * audio_count + question
202-
}]
203-
prompt = tokenizer.apply_chat_template(messages,
204-
tokenize=False,
205-
add_generation_prompt=True)
205+
messages = [{"role": "user", "content": "<|audio|>\n" * audio_count + question}]
206+
prompt = tokenizer.apply_chat_template(
207+
messages, tokenize=False, add_generation_prompt=True
208+
)
206209

207210
engine_args = EngineArgs(
208211
model=model_name,
@@ -220,8 +223,7 @@ def run_ultravox(question: str, audio_count: int) -> ModelRequestData:
220223

221224
# Whisper
222225
def run_whisper(question: str, audio_count: int) -> ModelRequestData:
223-
assert audio_count == 1, (
224-
"Whisper only support single audio input per prompt")
226+
assert audio_count == 1, "Whisper only support single audio input per prompt"
225227
model_name = "openai/whisper-large-v3-turbo"
226228

227229
prompt = "<|startoftranscript|>"
@@ -252,27 +254,33 @@ def run_whisper(question: str, audio_count: int) -> ModelRequestData:
252254

253255
def parse_args():
254256
parser = FlexibleArgumentParser(
255-
description='Demo on using vLLM for offline inference with '
256-
'audio language models')
257-
parser.add_argument('--model-type',
258-
'-m',
259-
type=str,
260-
default="ultravox",
261-
choices=model_example_map.keys(),
262-
help='Huggingface "model_type".')
263-
parser.add_argument('--num-prompts',
264-
type=int,
265-
default=1,
266-
help='Number of prompts to run.')
267-
parser.add_argument("--num-audios",
268-
type=int,
269-
default=1,
270-
choices=[0, 1, 2],
271-
help="Number of audio items per prompt.")
272-
parser.add_argument("--seed",
273-
type=int,
274-
default=None,
275-
help="Set the seed when initializing `vllm.LLM`.")
257+
description="Demo on using vLLM for offline inference with "
258+
"audio language models"
259+
)
260+
parser.add_argument(
261+
"--model-type",
262+
"-m",
263+
type=str,
264+
default="ultravox",
265+
choices=model_example_map.keys(),
266+
help='Huggingface "model_type".',
267+
)
268+
parser.add_argument(
269+
"--num-prompts", type=int, default=1, help="Number of prompts to run."
270+
)
271+
parser.add_argument(
272+
"--num-audios",
273+
type=int,
274+
default=1,
275+
choices=[0, 1, 2],
276+
help="Number of audio items per prompt.",
277+
)
278+
parser.add_argument(
279+
"--seed",
280+
type=int,
281+
default=None,
282+
help="Set the seed when initializing `vllm.LLM`.",
283+
)
276284

277285
return parser.parse_args()
278286

@@ -283,29 +291,30 @@ def main(args):
283291
raise ValueError(f"Model type {model} is not supported.")
284292

285293
audio_count = args.num_audios
286-
req_data = model_example_map[model](question_per_audio_count[audio_count],
287-
audio_count)
294+
req_data = model_example_map[model](
295+
question_per_audio_count[audio_count], audio_count
296+
)
288297

289298
# Disable other modalities to save memory
290299
default_limits = {"image": 0, "video": 0, "audio": 0}
291300
req_data.engine_args.limit_mm_per_prompt = default_limits | dict(
292-
req_data.engine_args.limit_mm_per_prompt or {})
301+
req_data.engine_args.limit_mm_per_prompt or {}
302+
)
293303

294304
engine_args = asdict(req_data.engine_args) | {"seed": args.seed}
295305
llm = LLM(**engine_args)
296306

297307
# We set temperature to 0.2 so that outputs can be different
298308
# even when all prompts are identical when running batch inference.
299-
sampling_params = SamplingParams(temperature=0.2,
300-
max_tokens=64,
301-
stop_token_ids=req_data.stop_token_ids)
309+
sampling_params = SamplingParams(
310+
temperature=0.2, max_tokens=64, stop_token_ids=req_data.stop_token_ids
311+
)
302312

303313
mm_data = {}
304314
if audio_count > 0:
305315
mm_data = {
306316
"audio": [
307-
asset.audio_and_sample_rate
308-
for asset in audio_assets[:audio_count]
317+
asset.audio_and_sample_rate for asset in audio_assets[:audio_count]
309318
]
310319
}
311320

@@ -315,8 +324,9 @@ def main(args):
315324
# Batch inference
316325
inputs = [inputs] * args.num_prompts
317326
# Add LoRA request if applicable
318-
lora_request = (req_data.lora_requests *
319-
args.num_prompts if req_data.lora_requests else None)
327+
lora_request = (
328+
req_data.lora_requests * args.num_prompts if req_data.lora_requests else None
329+
)
320330

321331
outputs = llm.generate(
322332
inputs,

examples/offline_inference/automatic_prefix_caching.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,16 @@
1616
Run:
1717
python examples/offline_inference/automatic_prefix_caching.py
1818
"""
19+
1920
import time
2021

2122
from vllm import LLM, SamplingParams
2223

2324
# ruff: noqa: E501
2425
# A prompt containing a large markdown table. The table is randomly generated by GPT-4.
25-
LONG_PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as follows.\n# Table\n" + """
26+
LONG_PROMPT = (
27+
"You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as follows.\n# Table\n"
28+
+ """
2629
| ID | Name | Age | Occupation | Country | Email | Phone Number | Address |
2730
|-----|---------------|-----|---------------|---------------|------------------------|----------------|------------------------------|
2831
| 1 | John Doe | 29 | Engineer | USA | [email protected] | 555-1234 | 123 Elm St, Springfield, IL |
@@ -56,6 +59,7 @@
5659
| 29 | Amy White | 33 | Musician | New Zealand | [email protected] | 555-5658 | 159 Maple St, Wellington, NZ |
5760
| 30 | Ben Black | 38 | Chef | Ireland | [email protected] | 555-7870 | 246 Fir St, Waterford, IE |
5861
"""
62+
)
5963

6064

6165
def get_generation_time(llm, sampling_params, prompts):
@@ -72,25 +76,25 @@ def get_generation_time(llm, sampling_params, prompts):
7276

7377
def main():
7478
# set enable_prefix_caching=True to enable APC
75-
llm = LLM(model='lmsys/longchat-13b-16k', enable_prefix_caching=True)
79+
llm = LLM(model="lmsys/longchat-13b-16k", enable_prefix_caching=True)
7680

7781
sampling_params = SamplingParams(temperature=0, max_tokens=100)
7882

7983
# Querying the age of John Doe
8084
get_generation_time(
8185
llm,
8286
sampling_params,
83-
LONG_PROMPT +
84-
"Question: what is the age of John Doe? Your answer: The age of John Doe is ",
87+
LONG_PROMPT
88+
+ "Question: what is the age of John Doe? Your answer: The age of John Doe is ",
8589
)
8690

8791
# Querying the age of Zack Blue
8892
# This query will be faster since vllm avoids computing the KV cache of LONG_PROMPT again.
8993
get_generation_time(
9094
llm,
9195
sampling_params,
92-
LONG_PROMPT +
93-
"Question: what is the age of Zack Blue? Your answer: The age of Zack Blue is ",
96+
LONG_PROMPT
97+
+ "Question: what is the age of Zack Blue? Your answer: The age of Zack Blue is ",
9498
)
9599

96100

examples/offline_inference/basic/chat.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,22 +56,12 @@ def print_outputs(outputs):
5656

5757
# In this script, we demonstrate how to pass input to the chat method:
5858
conversation = [
59-
{
60-
"role": "system",
61-
"content": "You are a helpful assistant"
62-
},
63-
{
64-
"role": "user",
65-
"content": "Hello"
66-
},
67-
{
68-
"role": "assistant",
69-
"content": "Hello! How can I assist you today?"
70-
},
59+
{"role": "system", "content": "You are a helpful assistant"},
60+
{"role": "user", "content": "Hello"},
61+
{"role": "assistant", "content": "Hello! How can I assist you today?"},
7162
{
7263
"role": "user",
73-
"content":
74-
"Write an essay about the importance of higher education.",
64+
"content": "Write an essay about the importance of higher education.",
7565
},
7666
]
7767
outputs = llm.chat(conversation, sampling_params, use_tqdm=False)

examples/offline_inference/basic/classify.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ def parse_args():
1010
parser = FlexibleArgumentParser()
1111
parser = EngineArgs.add_cli_args(parser)
1212
# Set example specific arguments
13-
parser.set_defaults(model="jason9693/Qwen2.5-1.5B-apeach",
14-
task="classify",
15-
enforce_eager=True)
13+
parser.set_defaults(
14+
model="jason9693/Qwen2.5-1.5B-apeach", task="classify", enforce_eager=True
15+
)
1616
return parser.parse_args()
1717

1818

@@ -36,10 +36,11 @@ def main(args: Namespace):
3636
print("\nGenerated Outputs:\n" + "-" * 60)
3737
for prompt, output in zip(prompts, outputs):
3838
probs = output.outputs.probs
39-
probs_trimmed = ((str(probs[:16])[:-1] +
40-
", ...]") if len(probs) > 16 else probs)
41-
print(f"Prompt: {prompt!r} \n"
42-
f"Class Probabilities: {probs_trimmed} (size={len(probs)})")
39+
probs_trimmed = (str(probs[:16])[:-1] + ", ...]") if len(probs) > 16 else probs
40+
print(
41+
f"Prompt: {prompt!r} \n"
42+
f"Class Probabilities: {probs_trimmed} (size={len(probs)})"
43+
)
4344
print("-" * 60)
4445

4546

0 commit comments

Comments
 (0)