Skip to content

Commit 3222c73

Browse files
Add HPU model runner & HPU input batch unit tests (#44)
Signed-off-by: Konrad Zawora <[email protected]>
1 parent dfcbfb7 commit 3222c73

File tree

9 files changed

+1142
-9
lines changed

9 files changed

+1142
-9
lines changed

.github/workflows/pre-merge.yaml

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ concurrency:
1212
cancel-in-progress: true
1313

1414
jobs:
15-
pre_merge_hpu_test:
15+
pre_merge_hpu_test_build:
1616
runs-on: ucb-vllm-cicd-g2
1717
permissions:
1818
contents: read # Required to checkout code and read history
@@ -73,15 +73,42 @@ jobs:
7373
7474
EOF
7575
echo "Docker image built successfully."
76+
hpu_unit_tests:
77+
runs-on: ucb-vllm-cicd-g2
78+
needs: pre_merge_hpu_test_build
79+
steps:
80+
- name: Run pytest in tests/unit_tests
81+
run: |
82+
EXITCODE=1
83+
remove_docker_containers() { docker rm -f hpu-plugin-v1-test-unit-tests || true; }
84+
trap 'remove_docker_containers; exit $EXITCODE;' EXIT
85+
remove_docker_containers
7686
87+
echo "Running HPU plugin v1 unit tests"
88+
docker run --rm --runtime=habana --name=hpu-plugin-v1-test-unit-tests --network=host \
89+
-e HABANA_VISIBLE_DEVICES=all \
90+
-e HF_HOME=/workspace/hf_cache \
91+
-v /mnt/hf_cache:/workspace/hf_cache \
92+
hpu-plugin-v1-test-env-pre-merge \
93+
/bin/bash -c "pytest -vvv /workspace/vllm-gaudi/tests/unit_tests"
94+
95+
EXITCODE=$?
96+
echo "Test script exited with code: $EXITCODE"
97+
98+
hpu_e2e_tests:
99+
runs-on: ucb-vllm-cicd-g2
100+
# This is a final job that runs after the build and unit tests
101+
# Unit tests are supposed to fail fast if anything goes wrong, removing the need for this job to run.
102+
needs: [pre_merge_hpu_test_build, hpu_unit_tests]
103+
steps:
77104
- name: Run test scripts
78105
run: |
79106
EXITCODE=1
80-
remove_docker_containers() { docker rm -f hpu-plugin-v1-test-pre-merge || true; }
107+
remove_docker_containers() { docker rm -f hpu-plugin-v1-e2e-tests || true; }
81108
trap 'remove_docker_containers; exit $EXITCODE;' EXIT
82109
remove_docker_containers
83110
84-
echo "Running HPU plugin v1 test"
111+
echo "Running HPU plugin v1 e2e tests"
85112
docker run --rm --runtime=habana --name=hpu-plugin-v1-test-pre-merge --network=host \
86113
-e HABANA_VISIBLE_DEVICES=all \
87114
-e HF_HOME=/workspace/hf_cache \
@@ -91,3 +118,11 @@ jobs:
91118
92119
EXITCODE=$?
93120
echo "Test script exited with code: $EXITCODE"
121+
122+
pre_merge_hpu_test:
123+
runs-on: ucb-vllm-cicd-g2
124+
needs: [hpu_unit_tests, hpu_e2e_tests]
125+
# This job is required to pass for pre-merge CI. By itself it does nothing, and will only pass if all jobs specified in "needs" list pass.
126+
steps:
127+
- name: Succeeded if all previous jobs passed
128+
run: echo "All previous jobs passed."

tests/unit_tests/conftest.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from vllm.distributed import (cleanup_dist_env_and_memory,
2+
init_distributed_environment,
3+
initialize_model_parallel)
4+
import pytest
5+
import tempfile
6+
7+
8+
@pytest.fixture
9+
def dist_init():
10+
temp_file = tempfile.mkstemp()[1]
11+
init_distributed_environment(
12+
world_size=1,
13+
rank=0,
14+
distributed_init_method=f"file://{temp_file}",
15+
local_rank=0,
16+
backend="hccl",
17+
)
18+
initialize_model_parallel(1, 1)
19+
yield
20+
cleanup_dist_env_and_memory()

tests/unit_tests/worker/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)