red-hat-data-services
diff --git a/‎.buildkite/check-wheel-size.py‎
Lines changed: 7 additions & 2 deletions b/‎.buildkite/check-wheel-size.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎.buildkite/generate_index.py‎
Lines changed: 2 additions & 0 deletions b/‎.buildkite/generate_index.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/lm-eval-harness/configs/SparseLlama3.1_2of4_fp8_compressed.yaml‎
Lines changed: 11 additions & 0 deletions b/‎.buildkite/lm-eval-harness/configs/SparseLlama3.1_2of4_fp8_compressed.yaml‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎.buildkite/lm-eval-harness/test_lm_eval_correctness.py‎
Lines changed: 1 addition & 0 deletions b/‎.buildkite/lm-eval-harness/test_lm_eval_correctness.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.buildkite/nightly-benchmarks/benchmark-pipeline.yaml‎
Lines changed: 4 additions & 3 deletions b/‎.buildkite/nightly-benchmarks/benchmark-pipeline.yaml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py‎
Lines changed: 2 additions & 0 deletions b/‎.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/nightly-benchmarks/scripts/download-tokenizer.py‎
Lines changed: 2 additions & 0 deletions b/‎.buildkite/nightly-benchmarks/scripts/download-tokenizer.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py‎
Lines changed: 2 additions & 0 deletions b/‎.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/nightly-benchmarks/scripts/get-lmdeploy-modelname.py‎
Lines changed: 2 additions & 0 deletions b/‎.buildkite/nightly-benchmarks/scripts/get-lmdeploy-modelname.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh‎
Lines changed: 1 addition & 1 deletion b/‎.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh‎
Lines changed: 1 addition & 1 deletion
@@ -1,9 +1,14 @@
+# SPDX-License-Identifier: Apache-2.0
+
 import os
 import sys
 import zipfile
 
-# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 250 MB
-VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 250))
+# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 400 MiB
+# Note that we have 400 MiB quota, please use it wisely.
+# See https://github.com/pypi/support/issues/3792 .
+# Please also sync the value with the one in Dockerfile.
+VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 400))
 
 
 def print_top_10_largest_files(zip_file):
 
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
 import argparse
 import os
 
 
@@ -0,0 +1,11 @@
+# bash ./run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/SparseLlama-3.1-8B-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM -b "auto" -t 2
+model_name: "nm-testing/SparseLlama-3.1-8B-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM"
+tasks:
+- name: "gsm8k"
+  metrics:
+  - name: "exact_match,strict-match"
+    value: 0.6353
+  - name: "exact_match,flexible-extract"
+    value: 0.637
+limit: null
+num_fewshot: null 
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """
 LM eval harness on model to compare vs HF baseline computed offline.
 Configs are found in configs/$MODEL.yaml
 
@@ -1,5 +1,6 @@
 steps:
   - label: "Wait for container to be ready"
+    key: wait-for-container-image
     agents:
       queue: A100
     plugins:
@@ -10,12 +11,11 @@ steps:
             command:
             - sh .buildkite/nightly-benchmarks/scripts/wait-for-image.sh
 
-  - wait
-
   - label: "A100"
     # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
     agents:
       queue: A100
+    depends_on: wait-for-container-image
     plugins:
     - kubernetes:
         podSpec:
@@ -49,6 +49,7 @@ steps:
     # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
     agents:
       queue: H200
+    depends_on: wait-for-container-image
     plugins:
     - docker#v5.12.0:
         image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT
@@ -73,7 +74,7 @@ steps:
     # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
     agents:
       queue: H100
-    depends_on: block-h100
+    depends_on: wait-for-container-image
     plugins:
     - docker#v5.12.0:
         image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT
 
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
 import json
 import os
 from pathlib import Path
 
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
 import argparse
 
 from transformers import AutoTokenizer
 
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
 import argparse
 import json
 from pathlib import Path
 
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
 from lmdeploy.serve.openai.api_client import APIClient
 
 api_client = APIClient("http://localhost:8000")
 
@@ -43,7 +43,7 @@ main() {
 
 
 
-    # The figures should be genereated by a separate process outside the CI/CD pipeline
+    # The figures should be generated by a separate process outside the CI/CD pipeline
 
     # # generate figures
     # python3 -m pip install tabulate pandas matplotlib
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# SPDX-License-Identifier: Apache-2.0`
	`2`	`+`
`1`	`3`	`import argparse`
`2`	`4`	`import os`
`3`	`5`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# SPDX-License-Identifier: Apache-2.0`
`1`	`2`	`"""`
`2`	`3`	`LM eval harness on model to compare vs HF baseline computed offline.`
`3`	`4`	`Configs are found in configs/$MODEL.yaml`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# SPDX-License-Identifier: Apache-2.0`
	`2`	`+`
`1`	`3`	`import json`
`2`	`4`	`import os`
`3`	`5`	`from pathlib import Path`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# SPDX-License-Identifier: Apache-2.0`
	`2`	`+`
`1`	`3`	`from lmdeploy.serve.openai.api_client import APIClient`
`2`	`4`
`3`	`5`	`api_client = APIClient("http://localhost:8000")`