Skip to content

Commit cd08720

Browse files
committed
Merge remote-tracking branch 'origin/main' into dmoss/flashinfer-cutlass-mxfp4-fused-moe
2 parents 6bdce85 + 8a19303 commit cd08720

File tree

308 files changed

+14052
-9956
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

308 files changed

+14052
-9956
lines changed

.buildkite/generate_index.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
<html>
99
<body>
1010
<h1>Links for vLLM</h1/>
11-
<a href="../{wheel_html_escaped}">{wheel}</a><br/>
11+
<a href="../{x86_wheel_html_escaped}">{x86_wheel}</a><br/>
12+
<a href="../{arm_wheel_html_escaped}">{arm_wheel}</a><br/>
1213
</body>
1314
</html>
1415
"""
@@ -21,7 +22,25 @@
2122

2223
with open("index.html", "w") as f:
2324
print(f"Generated index.html for {args.wheel}")
25+
# sync the abi tag with .buildkite/scripts/upload-wheels.sh
26+
if "x86_64" in filename:
27+
x86_wheel = filename
28+
arm_wheel = filename.replace("x86_64", "aarch64").replace(
29+
"manylinux1", "manylinux2014"
30+
)
31+
elif "aarch64" in filename:
32+
x86_wheel = filename.replace("aarch64", "x86_64").replace(
33+
"manylinux2014", "manylinux1"
34+
)
35+
arm_wheel = filename
36+
else:
37+
raise ValueError(f"Unsupported wheel: {filename}")
2438
# cloudfront requires escaping the '+' character
2539
f.write(
26-
template.format(wheel=filename, wheel_html_escaped=filename.replace("+", "%2B"))
40+
template.format(
41+
x86_wheel=x86_wheel,
42+
x86_wheel_html_escaped=x86_wheel.replace("+", "%2B"),
43+
arm_wheel=arm_wheel,
44+
arm_wheel_html_escaped=arm_wheel.replace("+", "%2B"),
45+
)
2746
)

.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-QQQ.yaml

Lines changed: 0 additions & 12 deletions
This file was deleted.

.buildkite/lm-eval-harness/configs/models-large.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ Meta-Llama-3-70B-Instruct.yaml
33
Mixtral-8x7B-Instruct-v0.1.yaml
44
Qwen2-57B-A14-Instruct.yaml
55
DeepSeek-V2-Lite-Chat.yaml
6-
Meta-Llama-3-8B-QQQ.yaml

.buildkite/nightly-benchmarks/scripts/compare-json-results.py

Lines changed: 119 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,44 +3,129 @@
33
import argparse
44
import json
55
import os
6+
from importlib import util
67

78
import pandas as pd
89

10+
plotly_found = util.find_spec("plotly.express") is not None
11+
912

1013
def compare_data_columns(
1114
files, name_column, data_column, info_cols, drop_column, debug=False
1215
):
13-
print("\ncompare_data_column: " + data_column)
16+
"""
17+
Align concatenation by keys derived from info_cols instead of row order.
18+
- Pick one canonical key list: subset of info_cols present in ALL files.
19+
- For each file: set index to those keys, aggregate duplicates
20+
- (mean for metric, first for names).
21+
- Concat along axis=1 (indexes align), then reset_index so callers can
22+
- group by columns.
23+
- If --debug, add a <file_label>_name column per file.
24+
"""
25+
print("\ncompare_data_column:", data_column)
26+
1427
frames = []
1528
raw_data_cols = []
1629
compare_frames = []
30+
31+
# 1) choose a canonical key list from info_cols that exists in ALL files
32+
cols_per_file = []
33+
for f in files:
34+
try:
35+
df_tmp = pd.read_json(f, orient="records")
36+
except Exception as err:
37+
raise ValueError(f"Failed to read {f}") from err
38+
cols_per_file.append(set(df_tmp.columns))
39+
40+
key_cols = [c for c in info_cols if all(c in cset for cset in cols_per_file)]
41+
if not key_cols:
42+
# soft fallback: use any info_cols present in the first file
43+
key_cols = [c for c in info_cols if c in list(cols_per_file[0])]
44+
if not key_cols:
45+
raise ValueError(
46+
"No common key columns found from info_cols across the input files."
47+
)
48+
49+
# 2) build a single "meta" block (keys as columns) once, aligned by the key index
50+
meta_added = False
51+
1752
for file in files:
18-
data_df = pd.read_json(file)
19-
serving_df = data_df.dropna(subset=[drop_column], ignore_index=True)
20-
# Show all info columns in the first couple columns
21-
if not frames:
22-
for col in info_cols:
23-
if col not in serving_df.columns:
24-
print(f"Skipping missing column: {col}")
25-
continue
26-
frames.append(serving_df[col])
27-
# only show test name under debug mode
28-
if debug is True:
29-
serving_df = serving_df.rename(columns={name_column: file + "_name"})
30-
frames.append(serving_df[file + "_name"])
31-
32-
file = "/".join(file.split("/")[:-1])
33-
serving_df = serving_df.rename(columns={data_column: file})
34-
frames.append(serving_df[file])
35-
raw_data_cols.append(file)
36-
compare_frames.append(serving_df[file])
53+
df = pd.read_json(file, orient="records")
54+
55+
# Keep rows that actually have the compared metric (same as original behavior)
56+
if drop_column in df.columns:
57+
df = df.dropna(subset=[drop_column], ignore_index=True)
58+
59+
# Stabilize numeric key columns (harmless if missing)
60+
for c in (
61+
"Input Len",
62+
"Output Len",
63+
"TP Size",
64+
"PP Size",
65+
"# of max concurrency.",
66+
"qps",
67+
):
68+
if c in df.columns:
69+
df[c] = pd.to_numeric(df[c], errors="coerce")
70+
71+
# Ensure all key columns exist
72+
for c in key_cols:
73+
if c not in df.columns:
74+
df[c] = pd.NA
75+
76+
# Set index = key_cols and aggregate duplicates → unique MultiIndex
77+
df_idx = df.set_index(key_cols, drop=False)
78+
79+
# meta (key columns), unique per key
80+
meta = df_idx[key_cols]
81+
if not meta.index.is_unique:
82+
meta = meta.groupby(level=key_cols, dropna=False).first()
83+
84+
# metric series for this file, aggregated to one row per key
85+
file_label = "/".join(file.split("/")[:-1]) or os.path.basename(file)
86+
s = df_idx[data_column]
87+
if not s.index.is_unique:
88+
s = s.groupby(level=key_cols, dropna=False).mean()
89+
s.name = file_label # column label like original
90+
91+
# add meta once (from first file) so keys are the leftmost columns
92+
if not meta_added:
93+
frames.append(meta)
94+
meta_added = True
95+
96+
# (NEW) debug: aligned test-name column per file
97+
if debug and name_column in df_idx.columns:
98+
name_s = df_idx[name_column]
99+
if not name_s.index.is_unique:
100+
name_s = name_s.groupby(level=key_cols, dropna=False).first()
101+
name_s.name = f"{file_label}_name"
102+
frames.append(name_s)
103+
104+
frames.append(s)
105+
raw_data_cols.append(file_label)
106+
compare_frames.append(s)
107+
108+
# Generalize ratio: for any file N>=2, add ratio (fileN / file1)
37109
if len(compare_frames) >= 2:
38-
# Compare numbers among two files
39-
ratio_df = compare_frames[1] / compare_frames[0]
40-
frames.append(ratio_df)
41-
compare_frames.pop(1)
110+
base = compare_frames[0]
111+
current = compare_frames[-1]
112+
ratio = current / base
113+
ratio = ratio.mask(base == 0) # avoid inf when baseline is 0
114+
ratio.name = f"Ratio 1 vs {len(compare_frames)}"
115+
frames.append(ratio)
42116

117+
# 4) concat on columns with aligned MultiIndex;
118+
# then reset_index to return keys as columns
43119
concat_df = pd.concat(frames, axis=1)
120+
concat_df = concat_df.reset_index(drop=True).reset_index()
121+
if "index" in concat_df.columns:
122+
concat_df = concat_df.drop(columns=["index"])
123+
124+
# Ensure key/info columns appear first (in your info_cols order)
125+
front = [c for c in info_cols if c in concat_df.columns]
126+
rest = [c for c in concat_df.columns if c not in front]
127+
concat_df = concat_df[front + rest]
128+
44129
print(raw_data_cols)
45130
return concat_df, raw_data_cols
46131

@@ -67,6 +152,15 @@ def split_json_by_tp_pp(
67152

68153
df = pd.DataFrame(data)
69154

155+
# Keep only "serving" tests
156+
name_col = next(
157+
(c for c in ["Test name", "test_name", "Test Name"] if c in df.columns), None
158+
)
159+
if name_col:
160+
df = df[
161+
df[name_col].astype(str).str.contains(r"serving", case=False, na=False)
162+
].copy()
163+
70164
# Handle alias column names
71165
rename_map = {
72166
"tp_size": "TP Size",
@@ -181,16 +275,14 @@ def split_json_by_tp_pp(
181275
f"Expected subset: {filtered_info_cols}, "
182276
f"but DataFrame has: {list(output_df.columns)}"
183277
)
184-
185278
output_df_sorted = output_df.sort_values(by=existing_group_cols)
186279
output_groups = output_df_sorted.groupby(existing_group_cols, dropna=False)
187280
for name, group in output_groups:
188281
html = group.to_html()
189282
text_file.write(html_msgs_for_data_cols[i])
190283
text_file.write(html)
191284

192-
if plot is True:
193-
import pandas as pd
285+
if plot and plotly_found:
194286
import plotly.express as px
195287

196288
df = group[raw_data_cols]

.buildkite/release-pipeline.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,12 @@ steps:
2727
env:
2828
DOCKER_BUILDKIT: "1"
2929

30+
- block: "Build CUDA 12.6 wheel"
31+
key: block-build-cu126-wheel
32+
depends_on: ~
33+
3034
- label: "Build wheel - CUDA 12.6"
35+
depends_on: block-build-cu126-wheel
3136
id: build-wheel-cuda-12-6
3237
agents:
3338
queue: cpu_queue_postmerge
@@ -68,7 +73,7 @@ steps:
6873
queue: cpu_queue_postmerge
6974
commands:
7075
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
71-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8.1 --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain -f docker/Dockerfile ."
76+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain -f docker/Dockerfile ."
7277
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
7378

7479
- label: "Annotate release workflow"

.buildkite/scripts/hardware_ci/run-cpu-test.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ function cpu_tests() {
4646
set -e
4747
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m"
4848

49+
# Run kernel tests
50+
docker exec cpu-test-"$NUMA_NODE" bash -c "
51+
set -e
52+
pytest -v -s tests/kernels/test_onednn.py"
53+
4954
# Run basic model test
5055
docker exec cpu-test-"$NUMA_NODE" bash -c "
5156
set -e
@@ -99,4 +104,4 @@ function cpu_tests() {
99104

100105
# All of CPU tests are expected to be finished less than 40 mins.
101106
export -f cpu_tests
102-
timeout 1.5h bash -c "cpu_tests $CORE_RANGE $NUMA_NODE"
107+
timeout 2h bash -c "cpu_tests $CORE_RANGE $NUMA_NODE"

.buildkite/scripts/tpu/cleanup_docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ if [ "$disk_usage" -gt "$threshold" ]; then
1717
# Remove dangling images (those that are not tagged and not used by any container)
1818
docker image prune -f
1919
# Remove unused volumes / force the system prune for old images as well.
20-
docker volume prune -f && docker system prune --force --filter "until=72h" --all
20+
docker volume prune -f && docker system prune --force --filter "until=24h" --all
2121
echo "Docker images and volumes cleanup completed."
2222
else
2323
echo "Disk usage is below $threshold%. No cleanup needed."

.buildkite/scripts/upload-wheels.sh

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,19 @@ fi
1414
# Get the single wheel file
1515
wheel="${wheel_files[0]}"
1616

17-
# Rename 'linux' to 'manylinux1' in the wheel filename
18-
new_wheel="${wheel/linux/manylinux1}"
17+
# Detect architecture and rename 'linux' to appropriate manylinux version
18+
arch=$(uname -m)
19+
if [[ $arch == "x86_64" ]]; then
20+
manylinux_version="manylinux1"
21+
elif [[ $arch == "aarch64" ]]; then
22+
manylinux_version="manylinux2014"
23+
else
24+
echo "Warning: Unknown architecture $arch, using manylinux1 as default"
25+
manylinux_version="manylinux1"
26+
fi
27+
28+
# Rename 'linux' to the appropriate manylinux version in the wheel filename
29+
new_wheel="${wheel/linux/$manylinux_version}"
1930
mv -- "$wheel" "$new_wheel"
2031
wheel="$new_wheel"
2132

0 commit comments

Comments
 (0)