Skip to content

Commit 929bbdb

Browse files
authored
misc model-validation ROCm fixes/improvements (#103)
- rocm: add mamba-ssm workaround, cleanup existing workaround - fix run-commands group title - neuralmagic performance and benchmark: allow config override based on accelerator type - accuracy/model-validation: add run-name - download-install-assets-nm-vllm-ent: install rocm dependencies in addition to constraints - env-test: rocm: disable pytorch tunableop - requirements-rocm: add xformers for mistral models - env-test: expand ROCm env vars - model-validation: improve run name display - accuracy: fix no config found warning - scripts: add invoke-model-validation - download-install-assets-nm-vllm-ent: make linter happy
1 parent dc07ef5 commit 929bbdb

File tree

13 files changed

+266
-85
lines changed

13 files changed

+266
-85
lines changed

.github/actions/download-install-assets-nm-vllm-ent/action.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ runs:
3333
fi
3434
3535
if [[ "${TARGET_DEVICE}" == "rocm" ]]; then
36-
uv pip install ${ASSETS} --constraints neuralmagic/requirements/rocm.txt
36+
uv pip install ${ASSETS} --constraints neuralmagic/requirements/rocm.txt -r neuralmagic/requirements/rocm.txt
3737
elif [[ "${TARGET_DEVICE}" == "cuda" ]]; then
3838
uv pip install ${ASSETS}
3939
else
40-
echo ::warning title=download and install assets::Unknown target device $TARGET_DEVICE, proceeding with install
40+
echo ::warning title=download and install assets::Unknown target device "$TARGET_DEVICE", proceeding with install
4141
uv pip install ${ASSETS}
4242
fi
4343

.github/actions/env-test-nm-vllm-ent/action.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,14 @@ runs:
5151
echo "RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=1" | tee -a "$GITHUB_ENV" # ray >= 2.45
5252
echo "HIP_FORCE_DEV_KERNARG=1" | tee -a "$GITHUB_ENV"
5353
echo "VLLM_USE_TRITON_FLASH_ATTN=0" | tee -a "$GITHUB_ENV"
54-
# tp>1 fails with ray
54+
# https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/inference-optimization/workload.html#vllm-performance-optimization
55+
echo "NCCL_MIN_NCHANNELS=112 "| tee -a "$GITHUB_ENV"
56+
echo "TORCH_BLAS_PREFER_HIPBLASLT=1" | tee -a "$GITHUB_ENV"
57+
## https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/inference-optimization/workload.html#pytorch-tunableop
58+
# echo "PYTORCH_TUNABLEOP_TUNING=1" | tee -a "$GITHUB_ENV"
59+
# echo "PYTORCH_TUNABLEOP_ENABLED=1" | tee -a "$GITHUB_ENV"
60+
61+
# tp>1 fails with ray as of 2025-05-01
5562
echo "VLLM_DISTRIBUTED_EXECUTOR_BACKEND=mp" >> "$GITHUB_ENV"
5663
shell: bash
5764

.github/actions/run-commands/action.yml

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,22 @@ runs:
3232
cd ${{ inputs.repo }}
3333
mv vllm vllm-ignore
3434
mv csrc csrc-ignore
35-
echo "::group::Installing requirements via pip"
35+
36+
echo "::group::Installing requirements via uv pip"
3637
if [[ "${TARGET_DEVICE}" == "cuda" ]]; then
37-
# temporarily installing mamba-ssm built from source due to missing numpy package
38+
# install mamba-ssm built from source due to missing prebuilt wheel for torch 2.7, see https://github.com/state-spaces/mamba/pull/720
3839
gsutil cp gs://nm-vllm-certs/caches/whls/mamba_ssm-2.2.4-cp39-abi3-linux_x86_64.whl .
39-
uv pip install mamba_ssm-2.2.4-cp39-abi3-linux_x86_64.whl
40-
uv pip install -r requirements/test.txt
40+
41+
uv pip install -r requirements/test.txt --overrides <(echo $PWD/mamba_ssm-2.2.4-cp39-abi3-linux_x86_64.whl)
4142
elif [[ "${TARGET_DEVICE}" == "rocm" ]]; then
4243
sed -i '/torch/d' requirements/test.in
43-
uv pip compile requirements/test.in -c ../neuralmagic/requirements/rocm.txt -o requirements/test.txt
44+
# workaround for mamba-ssm==2.2.4 build failure on torch==2.7.0, see https://github.com/state-spaces/mamba/issues/720
45+
gcloud storage cp gs://nm-vllm-certs/state-spaces/mamba/assets/15133810912/mamba_ssm-2.2.4-cp312-cp312-linux_x86_64.whl ./
46+
47+
uv pip compile requirements/test.in \
48+
--constraints ../neuralmagic/requirements/rocm.txt \
49+
--overrides <(echo mamba_ssm-2.2.4-cp312-cp312-linux_x86_64.whl) \
50+
--output requirements/test.txt
4451
uv pip install -r requirements/test.txt
4552
else
4653
echo ::error title=run commands::Invalid target_device=${TARGET_DEVICE}

.github/workflows/accuracy.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
name: accuracy
2+
run-name: accuracy ${{inputs.model}} on ${{inputs.label}} (${{github.actor}})
23

34
on:
45
workflow_call:

.github/workflows/model-validation.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
name: model validation
2+
run-name: Model Validation ${{inputs.model}} on ${{inputs.label}} (${{github.actor}})
23

34
on:
45
workflow_call:

neuralmagic/accuracy/run_llm_eval_test.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import argparse
22
import os
3-
import pathlib
43
import shlex
54
import subprocess
65
from pathlib import Path
@@ -10,22 +9,20 @@
109

1110
from neuralmagic.tools.logger import make_logger
1211

13-
from ..performance.vllm_server import Server
12+
from ..utils import Accelerator, get_config_file
13+
from ..vllm_server import Server
1414

1515

16-
def get_server_config(model_name: str) -> Path:
16+
def get_server_config(model_name: str, accelerator: Accelerator) -> Path:
1717
# vllm server options need to be updated to remove "model"
1818
# if it's in there
19-
server_config_file = pathlib.Path(
20-
f"model-validation-configs/{model_name}/accuracy/server.yml"
19+
server_config_file = get_config_file(
20+
"model-validation-configs",
21+
model_name,
22+
config_type="server",
23+
accelerator=accelerator,
24+
workflow_kind="accuracy",
2125
)
22-
if not server_config_file.exists():
23-
print(
24-
f"No server config found for {model_name}, using common/accuracy/server.yml"
25-
)
26-
server_config_file = pathlib.Path(
27-
"model-validation-configs/common/accuracy/server.yml"
28-
)
2926
server_yml = server_config_file.read_text(encoding="utf-8")
3027
server_config = yaml.safe_load(server_yml)
3128
server_config.pop("model", None)
@@ -35,7 +32,7 @@ def get_server_config(model_name: str) -> Path:
3532
return server_config_file.resolve()
3633

3734

38-
if __name__ == "__main__":
35+
def main() -> None:
3936
"""
4037
Starts a vllm server then runs the llm-eval-test tool against that server.
4138
@@ -54,22 +51,19 @@ def get_server_config(model_name: str) -> Path:
5451
model_name: str | None = os.getenv("MODEL_NAME")
5552
if model_name is None:
5653
raise ValueError("MODEL_NAME env var must be defined")
57-
config_file: Path = get_server_config(model_name)
54+
55+
accelerator = Accelerator.from_env()
56+
config_file: Path = get_server_config(model_name, accelerator)
5857
datasets_dir: str = str(Path().cwd() / "datasets")
5958
os.makedirs(datasets_dir, exist_ok=True)
6059

61-
# get client configuration
62-
client_config_file = Path(
63-
f"model-validation-configs/{model_name}/accuracy/client.yml"
60+
client_config_file = get_config_file(
61+
"model-validation-configs",
62+
model_name,
63+
config_type="client",
64+
accelerator=accelerator,
65+
workflow_kind="accuracy",
6466
)
65-
if not client_config_file.exists():
66-
print(
67-
"No client config found for {model_name}, using common/accuracy/client.yml"
68-
)
69-
client_config_file = pathlib.Path(
70-
"model-validation-configs/common/accuracy/client.yml"
71-
)
72-
7367
client_yml = client_config_file.read_text(encoding="utf-8")
7468
client_config = yaml.safe_load(client_yml)
7569

@@ -78,6 +72,10 @@ def get_server_config(model_name: str) -> Path:
7872
f"model-validation-configs/{model_name}/accuracy/tasks.yml"
7973
)
8074
tasks_list: str = "openllm,leaderboard"
75+
if tasks_config_file.exists():
76+
print(
77+
f"::warning title= run_llm_eval.py:: {tasks_config_file=} is currently overridden with {tasks_list=}"
78+
)
8179

8280
logger.info("launching server...")
8381
with Server(model_name, config_file=config_file) as server:
@@ -147,3 +145,7 @@ def get_server_config(model_name: str) -> Path:
147145
raise RuntimeError(
148146
f"llm-eval-test run failed to generate the expected result file {args.output}"
149147
)
148+
149+
150+
if __name__ == "__main__":
151+
main()

neuralmagic/performance/__init__.py

Whitespace-only changes.

neuralmagic/performance/benchmark_model.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
from pathlib import Path
55
from textwrap import indent
66

7-
from neuralmagic.tools.logger import make_logger
8-
9-
from .utils import client_config_to_cli_args, get_config_file
10-
from .vllm_server import Server
7+
from ..tools.logger import make_logger
8+
from ..utils import Accelerator, client_config_to_cli_args, get_config_file
9+
from ..vllm_server import Server
1110

1211

1312
def parse_args() -> argparse.Namespace:
@@ -24,17 +23,21 @@ def parse_args() -> argparse.Namespace:
2423
return parser.parse_args()
2524

2625

27-
if __name__ == "__main__":
26+
def main() -> None:
2827
logger = make_logger("benchmark_model")
28+
2929
args = parse_args()
30+
accelerator = Accelerator.from_env()
3031
server_config_file = get_config_file(
3132
base_config_dir=args.base_config_dir,
3233
model_name=args.model,
34+
accelerator=accelerator,
3335
config_type="server",
3436
)
3537
client_config_file = get_config_file(
3638
base_config_dir=args.base_config_dir,
3739
model_name=args.model,
40+
accelerator=accelerator,
3841
config_type="client",
3942
)
4043
logger.info("launching server...")
@@ -60,3 +63,7 @@ def parse_args() -> argparse.Namespace:
6063
if result.returncode != 0:
6164
message = f"guidellm command failed with returncode={result.returncode}"
6265
raise RuntimeError(message)
66+
67+
68+
if __name__ == "__main__":
69+
main()

neuralmagic/performance/utils.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

neuralmagic/requirements/rocm.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ amdsmi@file:///opt/rocm/share/amd_smi
88

99
sympy>=1.13.3 # temporarily pinned due to incompatibility with the pip-compiled version in requirements/test.txt
1010
ray==2.43.0 # newer ray versions are fubar
11+
xformers==0.0.30 # required for mistralai/Mistral-Small-3.1-24B-Instruct-2503

0 commit comments

Comments
 (0)