Skip to content

Commit 9b6c2be

Browse files
authored
Merge branch 'tg/update_vllm' into gm/validation
2 parents ad0b325 + ed6e907 commit 9b6c2be

File tree

1,921 files changed

+310497
-50645
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,921 files changed

+310497
-50645
lines changed

.buildkite/check-wheel-size.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,48 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
13
import os
4+
import sys
25
import zipfile
36

4-
MAX_SIZE_MB = 200
7+
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 400 MiB
8+
# Note that we have 400 MiB quota, please use it wisely.
9+
# See https://github.com/pypi/support/issues/3792 .
10+
# Please also sync the value with the one in Dockerfile.
11+
VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 400))
512

613

714
def print_top_10_largest_files(zip_file):
15+
"""Print the top 10 largest files in the given zip file."""
816
with zipfile.ZipFile(zip_file, 'r') as z:
917
file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()]
1018
file_sizes.sort(key=lambda x: x[1], reverse=True)
1119
for f, size in file_sizes[:10]:
12-
print(f"{f}: {size/(1024*1024)} MBs uncompressed.")
20+
print(f"{f}: {size / (1024 * 1024):.2f} MBs uncompressed.")
1321

1422

1523
def check_wheel_size(directory):
24+
"""Check the size of .whl files in the given directory."""
1625
for root, _, files in os.walk(directory):
17-
for f in files:
18-
if f.endswith(".whl"):
19-
wheel_path = os.path.join(root, f)
20-
wheel_size = os.path.getsize(wheel_path)
21-
wheel_size_mb = wheel_size / (1024 * 1024)
22-
if wheel_size_mb > MAX_SIZE_MB:
23-
print(
24-
f"Wheel {wheel_path} is too large ({wheel_size_mb} MB) "
25-
f"compare to the allowed size ({MAX_SIZE_MB} MB).")
26+
for file_name in files:
27+
if file_name.endswith(".whl"):
28+
wheel_path = os.path.join(root, file_name)
29+
wheel_size_mb = os.path.getsize(wheel_path) / (1024 * 1024)
30+
if wheel_size_mb > VLLM_MAX_SIZE_MB:
31+
print(f"Not allowed: Wheel {wheel_path} is larger "
32+
f"({wheel_size_mb:.2f} MB) than the limit "
33+
f"({VLLM_MAX_SIZE_MB} MB).")
2634
print_top_10_largest_files(wheel_path)
2735
return 1
2836
else:
2937
print(f"Wheel {wheel_path} is within the allowed size "
30-
f"({wheel_size_mb} MB).")
38+
f"({wheel_size_mb:.2f} MB).")
3139
return 0
3240

3341

3442
if __name__ == "__main__":
35-
import sys
36-
sys.exit(check_wheel_size(sys.argv[1]))
43+
if len(sys.argv) < 2:
44+
print("Usage: python check-wheel-size.py <directory>")
45+
sys.exit(1)
46+
47+
directory = sys.argv[1]
48+
sys.exit(check_wheel_size(directory))

.buildkite/download-images.sh

Lines changed: 0 additions & 18 deletions
This file was deleted.

.buildkite/generate_index.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
import argparse
4+
import os
5+
6+
template = """<!DOCTYPE html>
7+
<html>
8+
<body>
9+
<h1>Links for vLLM</h1/>
10+
<a href="../{wheel_html_escaped}">{wheel}</a><br/>
11+
</body>
12+
</html>
13+
"""
14+
15+
parser = argparse.ArgumentParser()
16+
parser.add_argument("--wheel", help="The wheel path.", required=True)
17+
args = parser.parse_args()
18+
19+
filename = os.path.basename(args.wheel)
20+
21+
with open("index.html", "w") as f:
22+
print(f"Generated index.html for {args.wheel}")
23+
# cloudfront requires escaping the '+' character
24+
f.write(
25+
template.format(wheel=filename,
26+
wheel_html_escaped=filename.replace("+", "%2B")))
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# bash ./run-lm-eval-gsm-vllm-baseline.sh -m deepseek-ai/DeepSeek-V2-Lite-Chat -b "auto" -l 1000 -f 5 -t 2
2+
model_name: "deepseek-ai/DeepSeek-V2-Lite-Chat"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.671
8+
- name: "exact_match,flexible-extract"
9+
value: 0.664
10+
limit: 1000
11+
num_fewshot: 5
12+
trust_remote_code: True
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5
2+
model_name: "nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.905
8+
- name: "exact_match,flexible-extract"
9+
value: 0.905
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors -b auto -l 1000 -f 5 -t 1
2+
model_name: "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.752
8+
- name: "exact_match,flexible-extract"
9+
value: 0.754
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5 -t 1
2+
model_name: "nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.753
8+
- name: "exact_match,flexible-extract"
9+
value: 0.753
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test -b 32 -l 1000 -f 5 -t 1
2+
model_name: "nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.755
8+
- name: "exact_match,flexible-extract"
9+
value: 0.755
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m neuralmagic/Meta-Llama-3-8B-Instruct-FP8 -b 32 -l 250 -f 5 -t 1
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Meta-Llama-3-8B-Instruct-FP8 -b 32 -l 250 -f 5 -t 1
22
model_name: "neuralmagic/Meta-Llama-3-8B-Instruct-FP8"
33
tasks:
44
- name: "gsm8k"
55
metrics:
66
- name: "exact_match,strict-match"
7-
value: 0.756
7+
value: 0.753
88
- name: "exact_match,flexible-extract"
9-
value: 0.752
10-
limit: 250
9+
value: 0.753
10+
limit: 1000
1111
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Asym-Per-Token-Test -b "auto" -l 250 -f 5 -t 1
2+
model_name: "nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Asym-Per-Token-Test"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.764
8+
- name: "exact_match,flexible-extract"
9+
value: 0.764
10+
limit: 250
11+
num_fewshot: 5

0 commit comments

Comments
 (0)