opendatahub-io
diff --git a/‎.buildkite/check-wheel-size.py
Lines changed: 4 additions & 2 deletions b/‎.buildkite/check-wheel-size.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎.buildkite/generate_index.py
Lines changed: 2 additions & 0 deletions b/‎.buildkite/generate_index.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/lm-eval-harness/configs/SparseLlama3.1_2of4_fp8_compressed.yaml
Lines changed: 11 additions & 0 deletions b/‎.buildkite/lm-eval-harness/configs/SparseLlama3.1_2of4_fp8_compressed.yaml
Lines changed: 11 additions & 0 deletions
diff --git a/‎.buildkite/lm-eval-harness/test_lm_eval_correctness.py
Lines changed: 1 addition & 0 deletions b/‎.buildkite/lm-eval-harness/test_lm_eval_correctness.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
Lines changed: 2 additions & 0 deletions b/‎.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/nightly-benchmarks/scripts/download-tokenizer.py
Lines changed: 2 additions & 0 deletions b/‎.buildkite/nightly-benchmarks/scripts/download-tokenizer.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py
Lines changed: 2 additions & 0 deletions b/‎.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/nightly-benchmarks/scripts/get-lmdeploy-modelname.py
Lines changed: 2 additions & 0 deletions b/‎.buildkite/nightly-benchmarks/scripts/get-lmdeploy-modelname.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/nightly-benchmarks/scripts/summary-nightly-results.py
Lines changed: 2 additions & 0 deletions b/‎.buildkite/nightly-benchmarks/scripts/summary-nightly-results.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎.buildkite/run-gh200-test.sh
Lines changed: 2 additions & 2 deletions b/‎.buildkite/run-gh200-test.sh
Lines changed: 2 additions & 2 deletions
@@ -1,12 +1,14 @@
+# SPDX-License-Identifier: Apache-2.0
+
 import os
 import sys
 import zipfile
 
-# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 300 MiB
+# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 400 MiB
 # Note that we have 400 MiB quota, please use it wisely.
 # See https://github.com/pypi/support/issues/3792 .
 # Please also sync the value with the one in Dockerfile.
-VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 300))
+VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 400))
 
 
 def print_top_10_largest_files(zip_file):
 
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
 import argparse
 import os
 
 
@@ -0,0 +1,11 @@
+# bash ./run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/SparseLlama-3.1-8B-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM -b "auto" -t 2
+model_name: "nm-testing/SparseLlama-3.1-8B-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM"
+tasks:
+- name: "gsm8k"
+  metrics:
+  - name: "exact_match,strict-match"
+    value: 0.6353
+  - name: "exact_match,flexible-extract"
+    value: 0.637
+limit: null
+num_fewshot: null 
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """
 LM eval harness on model to compare vs HF baseline computed offline.
 Configs are found in configs/$MODEL.yaml
 
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
 import json
 import os
 from pathlib import Path
 
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
 import argparse
 
 from transformers import AutoTokenizer
 
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
 import argparse
 import json
 from pathlib import Path
 
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
 from lmdeploy.serve.openai.api_client import APIClient
 
 api_client = APIClient("http://localhost:8000")
 
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
 import datetime
 import json
 import os
 
@@ -23,6 +23,6 @@ trap remove_docker_container EXIT
 remove_docker_container
 
 # Run the image and test offline inference
-docker run --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
-    python3 examples/offline_inference/basic.py
+docker run -e HF_TOKEN -v /root/.cache/huggingface:/root/.cache/huggingface --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
+    python3 examples/offline_inference/cli.py --model meta-llama/Llama-3.2-1B
 '
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# SPDX-License-Identifier: Apache-2.0`
	`2`	`+`
`1`	`3`	`import argparse`
`2`	`4`	`import os`
`3`	`5`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# SPDX-License-Identifier: Apache-2.0`
`1`	`2`	`"""`
`2`	`3`	`LM eval harness on model to compare vs HF baseline computed offline.`
`3`	`4`	`Configs are found in configs/$MODEL.yaml`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# SPDX-License-Identifier: Apache-2.0`
	`2`	`+`
`1`	`3`	`import json`
`2`	`4`	`import os`
`3`	`5`	`from pathlib import Path`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# SPDX-License-Identifier: Apache-2.0`
	`2`	`+`
`1`	`3`	`from lmdeploy.serve.openai.api_client import APIClient`
`2`	`4`
`3`	`5`	`api_client = APIClient("http://localhost:8000")`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# SPDX-License-Identifier: Apache-2.0`
	`2`	`+`
`1`	`3`	`import datetime`
`2`	`4`	`import json`
`3`	`5`	`import os`
Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,6 @@ trap remove_docker_container EXIT`
`23`	`23`	`remove_docker_container`
`24`	`24`
`25`	`25`	`# Run the image and test offline inference`
`26`		`-docker run --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '`
`27`		`- python3 examples/offline_inference/basic.py`
	`26`	`+docker run -e HF_TOKEN -v /root/.cache/huggingface:/root/.cache/huggingface --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '`
	`27`	`+ python3 examples/offline_inference/cli.py --model meta-llama/Llama-3.2-1B`
`28`	`28`	`'`