Skip to content
Open
59 changes: 59 additions & 0 deletions .github/workflows/latest_tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: Tests on dev branch of vllm and transformers

on:
# Run automatically every Saturday at 00:00 UTC
schedule:
- cron: "0 0 * * 6"

# Allow manual triggering via GitHub UI
workflow_dispatch:

# Optional: run on pushes to main or release branches
push:
branches:
- main
- v*-release

pull_request:
branches:
- main

jobs:
run_tests:
name: Run tests on dev branch of vllm and transformers
runs-on: 'aws-g4dn-2xlarge-use1-public-80'
steps:
- name: Install Git LFS
run: |
sudo apt-get update && sudo apt-get install -y git-lfs
git lfs install

- name: Install Python development headers
run: sudo apt-get update && sudo apt-get install -y python3.10-dev

- name: Checkout repository
uses: actions/checkout@v4
with:
lfs: true

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true

- name: Install the project
run: |
uv sync --extra dev
VLLM_USE_PRECOMPILED=1 uv pip install --upgrade git+https://github.com/vllm-project/vllm.git@main
uv pip install --upgrade git+https://github.com/huggingface/transformers.git@main

- name: run nvidia-smi
run: nvidia-smi

- name: Pip freeze
run: uv pip freeze

- name: Run tests
run: |
VLLM_WORKER_MULTIPROC_METHOD=spawn uv run pytest --disable-pytest-warnings --runslow tests/slow_tests/test_vllm_model.py
uv run pytest --disable-pytest-warnings --runslow tests/slow_tests/test_accelerate_model.py
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ dependencies = [
"GitPython>=3.1.41", # for logging
"datasets>=4.0.0",
"pydantic",
"numpy>=2", # pinned to avoid incompatibilities
"numpy>=2,<2.3", # pinned to avoid incompatibilities
"hf-xet>=1.1.8", # pinned to avoid failing test suite
# Prettiness
"typer",
Expand Down Expand Up @@ -98,7 +98,7 @@ nanotron = [
"tensorboardX"
]
tensorboardX = ["tensorboardX"]
vllm = ["vllm>=0.10.0,<0.10.2", "ray", "more_itertools"]
vllm = ["vllm", "ray", "more_itertools"]
sglang = ["sglang"]
quality = ["ruff>=v0.11.0","pre-commit"]
tests = ["pytest>=7.4.0","deepdiff","pip>=25.2"]
Expand Down
5 changes: 3 additions & 2 deletions src/lighteval/models/vllm/vllm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import ray
from more_itertools import distribute
from vllm import LLM, RequestOutput, SamplingParams
from vllm.inputs.data import TokensPrompt
from vllm.distributed.parallel_state import (
destroy_distributed_environment,
destroy_model_parallel,
Expand Down Expand Up @@ -291,7 +292,7 @@ def _create_auto_model(self, config: VLLMModelConfig) -> Optional[LLM]:
# Inferring from the tokenizer will cause vllm to bug for models with mismatches between model
# config and tk config, like mistralai/Mistral-7B-v0.1
if self._max_length is None:
self._max_length = model.llm_engine.model_config.max_seq_len_to_capture
self._max_length = model.llm_engine.model_config.max_model_len

return model

Expand Down Expand Up @@ -455,7 +456,7 @@ def run_inference_one_model(model_args: dict, sampling_params: SamplingParams, r
]
else:
outputs = self.model.generate(
prompt_token_ids=inputs,
prompts=[TokensPrompt(prompt_token_ids=input) for input in inputs],
sampling_params=sampling_params,
use_tqdm=True,
)
Expand Down
Loading