Skip to content

Commit d92c04b

Browse files
committed
Merge remote-tracking branch 'upstream/main' into upstream_merge_2025_05_29
2 parents 628db8d + ca2f6b9 commit d92c04b

File tree

137 files changed

+4932
-1563
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

137 files changed

+4932
-1563
lines changed

.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,46 @@
22

33
set -xu
44

5-
# Build the docker image.
6-
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
75

8-
# Set up cleanup.
9-
remove_docker_container() { docker rm -f tpu-test || true; }
6+
remove_docker_container() {
7+
docker rm -f tpu-test || true;
8+
docker rm -f vllm-tpu || true;
9+
}
10+
1011
trap remove_docker_container EXIT
12+
1113
# Remove the container that might not be cleaned up in the previous run.
1214
remove_docker_container
1315

16+
# Build the docker image.
17+
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
18+
19+
# Set up cleanup.
20+
cleanup_docker() {
21+
# Get Docker's root directory
22+
docker_root=$(docker info -f '{{.DockerRootDir}}')
23+
if [ -z "$docker_root" ]; then
24+
echo "Failed to determine Docker root directory."
25+
exit 1
26+
fi
27+
echo "Docker root directory: $docker_root"
28+
# Check disk usage of the filesystem where Docker's root directory is located
29+
disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//')
30+
# Define the threshold
31+
threshold=70
32+
if [ "$disk_usage" -gt "$threshold" ]; then
33+
echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..."
34+
# Remove dangling images (those that are not tagged and not used by any container)
35+
docker image prune -f
36+
# Remove unused volumes / force the system prune for old images as well.
37+
docker volume prune -f && docker system prune --force --filter "until=72h" --all
38+
echo "Docker images and volumes cleanup completed."
39+
else
40+
echo "Disk usage is below $threshold%. No cleanup needed."
41+
fi
42+
}
43+
cleanup_docker
44+
1445
# For HF_TOKEN.
1546
source /etc/environment
1647

.buildkite/test-pipeline.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,9 @@ steps:
202202
- tests/test_sequence
203203
- tests/test_config
204204
- tests/test_logger
205+
- tests/test_vllm_port
205206
commands:
206-
- pytest -v -s engine test_sequence.py test_config.py test_logger.py
207+
- pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
207208
# OOM in the CI unless we run this separately
208209
- pytest -v -s tokenization
209210
working_dir: "/vllm-workspace/tests" # optional
@@ -628,9 +629,11 @@ steps:
628629
- vllm/worker/model_runner.py
629630
- entrypoints/llm/test_collective_rpc.py
630631
- tests/v1/test_async_llm_dp.py
632+
- tests/v1/entrypoints/openai/test_multi_api_servers.py
631633
- vllm/v1/engine/
632634
commands:
633635
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py
636+
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
634637
- pytest -v -s entrypoints/llm/test_collective_rpc.py
635638
- pytest -v -s ./compile/test_basic_correctness.py
636639
- pytest -v -s ./compile/test_wrapper.py

SECURITY.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,6 @@ Please report security issues privately using [the vulnerability submission form
88

99
---
1010

11+
Please see the [Security Guide in the vLLM documentation](https://docs.vllm.ai/en/latest/usage/security.html) for more information on vLLM's security assumptions and recommendations.
12+
1113
Please see [PyTorch's Security Policy](https://github.com/pytorch/pytorch/blob/main/SECURITY.md) for more information and recommendations on how to securely interact with models.

benchmarks/README.md

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ become available.
6464
<td style="text-align: center;">✅</td>
6565
<td><code>lmms-lab/LLaVA-OneVision-Data</code>, <code>Aeala/ShareGPT_Vicuna_unfiltered</code></td>
6666
</tr>
67+
<tr>
68+
<td><strong>Custom</strong></td>
69+
<td style="text-align: center;">✅</td>
70+
<td style="text-align: center;">✅</td>
71+
<td>Local file: <code>data.jsonl</code></td>
72+
</tr>
6773
</tbody>
6874
</table>
6975

@@ -124,6 +130,38 @@ P99 ITL (ms): 8.39
124130
==================================================
125131
```
126132

133+
### Custom Dataset
134+
If the dataset you want to benchmark is not supported yet in vLLM, even then you can benchmark on it using `CustomDataset`. Your data needs to be in `.jsonl` format and needs to have "prompt" field per entry, e.g., data.jsonl
135+
136+
```
137+
{"prompt": "What is the capital of India?"}
138+
{"prompt": "What is the capital of Iran?"}
139+
{"prompt": "What is the capital of China?"}
140+
```
141+
142+
```bash
143+
# start server
144+
VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.1-8B-Instruct --disable-log-requests
145+
```
146+
147+
```bash
148+
# run benchmarking script
149+
python3 benchmarks/benchmark_serving.py --port 9001 --save-result --save-detailed \
150+
--backend vllm \
151+
--model meta-llama/Llama-3.1-8B-Instruct \
152+
--endpoint /v1/completions \
153+
--dataset-name custom \
154+
--dataset-path <path-to-your-data-jsonl> \
155+
--custom-skip-chat-template \
156+
--num-prompts 80 \
157+
--max-concurrency 1 \
158+
--temperature=0.3 \
159+
--top-p=0.75 \
160+
--result-dir "./log/"
161+
```
162+
163+
You can skip applying chat template if your data already has it by using `--custom-skip-chat-template`.
164+
127165
### VisionArena Benchmark for Vision Language Models
128166

129167
```bash
@@ -146,9 +184,9 @@ python3 vllm/benchmarks/benchmark_serving.py \
146184

147185
``` bash
148186
VLLM_USE_V1=1 vllm serve meta-llama/Meta-Llama-3-8B-Instruct \
149-
--ngram_prompt_lookup_min 2 \
150-
--ngram-prompt-lookup-max 5 \
151-
--speculative_config '{"model": "[ngram]", "num_speculative_tokens": 5}
187+
--speculative-config $'{"method": "ngram",
188+
"num_speculative_tokens": 5, "prompt_lookup_max": 5,
189+
"prompt_lookup_min": 2}'
152190
```
153191

154192
``` bash
@@ -203,6 +241,16 @@ python3 vllm/benchmarks/benchmark_serving.py \
203241
--seed 42
204242
```
205243

244+
**`philschmid/mt-bench`**
245+
246+
``` bash
247+
python3 vllm/benchmarks/benchmark_serving.py \
248+
--model Qwen/QwQ-32B \
249+
--dataset-name hf \
250+
--dataset-path philschmid/mt-bench \
251+
--num-prompts 80
252+
```
253+
206254
### Running With Sampling Parameters
207255

208256
When using OpenAI-compatible backends such as `vllm`, optional sampling
@@ -273,9 +321,9 @@ python3 vllm/benchmarks/benchmark_throughput.py \
273321
--output-len=100 \
274322
--num-prompts=2048 \
275323
--async-engine \
276-
--ngram_prompt_lookup_min=2 \
277-
--ngram-prompt-lookup-max=5 \
278-
--speculative_config '{"model": "[ngram]", "num_speculative_tokens": 5}
324+
--speculative-config $'{"method": "ngram",
325+
"num_speculative_tokens": 5, "prompt_lookup_max": 5,
326+
"prompt_lookup_min": 2}'
279327
```
280328

281329
```

benchmarks/benchmark_dataset.py

Lines changed: 91 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@
99
- BurstGPT
1010
- HuggingFace
1111
- VisionArena
12-
13-
TODO: Implement CustomDataset to parse a JSON file and convert its contents into
14-
SampleRequest instances, similar to the approach used in ShareGPT.
1512
"""
1613

1714
import base64
@@ -442,6 +439,97 @@ def sample(
442439
return samples
443440

444441

442+
# -----------------------------------------------------------------------------
443+
# Custom Dataset Implementation
444+
# -----------------------------------------------------------------------------
445+
446+
447+
class CustomDataset(BenchmarkDataset):
448+
"""
449+
Implements the Custom dataset. Loads data from a JSONL file and generates
450+
sample requests based on conversation turns. E.g.,
451+
```
452+
{"prompt": "What is the capital of India?"}
453+
{"prompt": "What is the capital of Iran?"}
454+
{"prompt": "What is the capital of China?"}
455+
```
456+
"""
457+
458+
def __init__(self, **kwargs) -> None:
459+
super().__init__(**kwargs)
460+
self.load_data()
461+
462+
def load_data(self) -> None:
463+
if self.dataset_path is None:
464+
raise ValueError("dataset_path must be provided for loading data.")
465+
466+
# self.data will be a list of dictionaries
467+
# e.g., [{"prompt": "What is the capital of India?"}, ...]
468+
# This will be the standardized format which load_data()
469+
# has to convert into depending on the filetype of dataset_path.
470+
# sample() will assume this standardized format of self.data
471+
self.data = []
472+
473+
# Load the JSONL file
474+
if self.dataset_path.endswith(".jsonl"):
475+
jsonl_data = pd.read_json(path_or_buf=self.dataset_path, lines=True)
476+
477+
# check if the JSONL file has a 'prompt' column
478+
if "prompt" not in jsonl_data.columns:
479+
raise ValueError("JSONL file must contain a 'prompt' column.")
480+
481+
# Convert each row to a dictionary and append to self.data
482+
# This will convert the DataFrame to a list of dictionaries
483+
# where each dictionary corresponds to a row in the DataFrame.
484+
# This is the standardized format we want for self.data
485+
for _, row in jsonl_data.iterrows():
486+
self.data.append(row.to_dict())
487+
else:
488+
raise NotImplementedError(
489+
"Only JSONL format is supported for CustomDataset."
490+
)
491+
492+
random.seed(self.random_seed)
493+
random.shuffle(self.data)
494+
495+
def sample(
496+
self,
497+
tokenizer: PreTrainedTokenizerBase,
498+
num_requests: int,
499+
lora_path: Optional[str] = None,
500+
max_loras: Optional[int] = None,
501+
output_len: Optional[int] = None,
502+
enable_multimodal_chat: bool = False,
503+
skip_chat_template: bool = False,
504+
**kwargs,
505+
) -> list:
506+
sampled_requests = []
507+
for item in self.data:
508+
if len(sampled_requests) >= num_requests:
509+
break
510+
prompt = item["prompt"]
511+
512+
# apply template
513+
if not skip_chat_template:
514+
prompt = tokenizer.apply_chat_template(
515+
[{"role": "user", "content": prompt}],
516+
add_generation_prompt=True,
517+
tokenize=False,
518+
)
519+
520+
prompt_len = len(tokenizer(prompt).input_ids)
521+
sampled_requests.append(
522+
SampleRequest(
523+
prompt=prompt,
524+
prompt_len=prompt_len,
525+
expected_output_len=output_len,
526+
)
527+
)
528+
self.maybe_oversample_requests(sampled_requests, num_requests)
529+
530+
return sampled_requests
531+
532+
445533
# -----------------------------------------------------------------------------
446534
# Sonnet Dataset Implementation
447535
# -----------------------------------------------------------------------------

benchmarks/benchmark_serving.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
ASRDataset,
6161
BurstGPTDataset,
6262
ConversationDataset,
63+
CustomDataset,
6364
HuggingFaceDataset,
6465
InstructCoderDataset,
6566
MTBenchDataset,
@@ -627,7 +628,16 @@ def main(args: argparse.Namespace):
627628
"'--dataset-path' if required."
628629
)
629630

630-
if args.dataset_name == "sonnet":
631+
if args.dataset_name == "custom":
632+
dataset = CustomDataset(dataset_path=args.dataset_path)
633+
input_requests = dataset.sample(
634+
num_requests=args.num_prompts,
635+
tokenizer=tokenizer,
636+
output_len=args.custom_output_len,
637+
skip_chat_template=args.custom_skip_chat_template,
638+
)
639+
640+
elif args.dataset_name == "sonnet":
631641
dataset = SonnetDataset(dataset_path=args.dataset_path)
632642
# For the "sonnet" dataset, formatting depends on the backend.
633643
if args.backend == "openai-chat":
@@ -838,6 +848,8 @@ def main(args: argparse.Namespace):
838848
]:
839849
if field in result_json:
840850
del result_json[field]
851+
if field in benchmark_result:
852+
del benchmark_result[field]
841853

842854
# Save to file
843855
base_model_id = model_id.split("/")[-1]
@@ -850,6 +862,7 @@ def main(args: argparse.Namespace):
850862
if args.result_filename:
851863
file_name = args.result_filename
852864
if args.result_dir:
865+
os.makedirs(args.result_dir, exist_ok=True)
853866
file_name = os.path.join(args.result_dir, file_name)
854867
with open(
855868
file_name, mode="a+" if args.append_result else "w", encoding="utf-8"
@@ -890,7 +903,7 @@ def main(args: argparse.Namespace):
890903
"--dataset-name",
891904
type=str,
892905
default="sharegpt",
893-
choices=["sharegpt", "burstgpt", "sonnet", "random", "hf"],
906+
choices=["sharegpt", "burstgpt", "sonnet", "random", "hf", "custom"],
894907
help="Name of the dataset to benchmark on.",
895908
)
896909
parser.add_argument(
@@ -1060,6 +1073,19 @@ def main(args: argparse.Namespace):
10601073
)
10611074

10621075
# group for dataset specific arguments
1076+
custom_group = parser.add_argument_group("custom dataset options")
1077+
custom_group.add_argument(
1078+
"--custom-output-len",
1079+
type=int,
1080+
default=256,
1081+
help="Number of output tokens per request, used only for custom dataset.",
1082+
)
1083+
custom_group.add_argument(
1084+
"--custom-skip-chat-template",
1085+
action="store_true",
1086+
help="Skip applying chat template to prompt, used only for custom dataset.",
1087+
)
1088+
10631089
sonnet_group = parser.add_argument_group("sonnet dataset options")
10641090
sonnet_group.add_argument(
10651091
"--sonnet-input-len",

0 commit comments

Comments
 (0)