Skip to content

Commit 57cf8f9

Browse files
committed
Merge branch 'sync-with-0.7.2' into release
2 parents cdaa1c7 + 6ec0863 commit 57cf8f9

File tree

1,096 files changed

+10632
-3854
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,096 files changed

+10632
-3854
lines changed

.buildkite/check-wheel-size.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
13
import os
24
import sys
35
import zipfile
46

5-
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 300 MiB
7+
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 400 MiB
68
# Note that we have 400 MiB quota, please use it wisely.
79
# See https://github.com/pypi/support/issues/3792 .
810
# Please also sync the value with the one in Dockerfile.
9-
VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 300))
11+
VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 400))
1012

1113

1214
def print_top_10_largest_files(zip_file):

.buildkite/generate_index.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
13
import argparse
24
import os
35

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash ./run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/SparseLlama-3.1-8B-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM -b "auto" -t 2
2+
model_name: "nm-testing/SparseLlama-3.1-8B-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.6353
8+
- name: "exact_match,flexible-extract"
9+
value: 0.637
10+
limit: null
11+
num_fewshot: null

.buildkite/lm-eval-harness/test_lm_eval_correctness.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-License-Identifier: Apache-2.0
12
"""
23
LM eval harness on model to compare vs HF baseline computed offline.
34
Configs are found in configs/$MODEL.yaml

.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
13
import json
24
import os
35
from pathlib import Path

.buildkite/nightly-benchmarks/scripts/download-tokenizer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
13
import argparse
24

35
from transformers import AutoTokenizer

.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
13
import argparse
24
import json
35
from pathlib import Path

.buildkite/nightly-benchmarks/scripts/get-lmdeploy-modelname.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
13
from lmdeploy.serve.openai.api_client import APIClient
24

35
api_client = APIClient("http://localhost:8000")

.buildkite/nightly-benchmarks/scripts/summary-nightly-results.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
13
import datetime
24
import json
35
import os

.buildkite/run-gh200-test.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,6 @@ trap remove_docker_container EXIT
2323
remove_docker_container
2424

2525
# Run the image and test offline inference
26-
docker run --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
27-
python3 examples/offline_inference/basic.py
26+
docker run -e HF_TOKEN -v /root/.cache/huggingface:/root/.cache/huggingface --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
27+
python3 examples/offline_inference/cli.py --model meta-llama/Llama-3.2-1B
2828
'

0 commit comments

Comments
 (0)