Fix DP>1 & TP>1 evals with vllm (#841)

NouamaneTazi · clefourrier · web-flow · commit 24895519caec · 2025-08-01T14:46:17.000+02:00
* fix vllm evals using dp&gt;1 and tp&gt;1

* Update vllm dependency version to 0.8.5.post1 in pyproject.toml

* should fix case

---------

Co-authored-by: Clémentine Fourrier &lt;22726840+clefourrier@users.noreply.github.com&gt;
diff --git a/pyproject.toml b/pyproject.toml
@@ -94,7 +94,7 @@ nanotron = [
   "tensorboardX"
 ]
 tensorboardX = ["tensorboardX"]
-vllm = ["vllm>=0.8.4", "ray", "more_itertools"]
+vllm = ["vllm>=0.8.5.post1", "ray", "more_itertools"]
 quality = ["ruff>=v0.11.0","pre-commit"]
 tests = ["pytest>=7.4.0","deepdiff"]
 dev = ["lighteval[accelerate,quality,tests,multilingual,math,extended_tasks,vllm]"]
diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py
@@ -399,14 +399,8 @@ def _generate(
             sampling_params.detokenize = False
 
         if self.data_parallel_size > 1:
-            # vLLM hangs if tensor_parallel > 1 and resources are set in ray.remote
-            # also seems to only work with decorator and not with ray.remote() fn
-            # see https://github.com/vllm-project/vllm/issues/973
-            # note: this has changed on 0.3.3, and it only works now if num_gpus are set.
-            # but then tensor_parallel breaks
-            # Hynek: With the newest vllm, it actually breaks when tensor_parallel_size == 1 and num_gpus not set,
-            # as VLLM complains about no GPUs available.
-            @ray.remote(num_gpus=1 if self.tensor_parallel_size == 1 else None)
+
+            @ray.remote(num_gpus=self.tensor_parallel_size)
             def run_inference_one_model(model_args: dict, sampling_params: SamplingParams, requests):
                 llm = LLM(**model_args)
                 return llm.generate(prompt_token_ids=requests, sampling_params=sampling_params)

Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ nanotron = [`
`94`	`94`	`"tensorboardX"`
`95`	`95`	`]`
`96`	`96`	`tensorboardX = ["tensorboardX"]`
`97`		`-vllm = ["vllm>=0.8.4", "ray", "more_itertools"]`
	`97`	`+vllm = ["vllm>=0.8.5.post1", "ray", "more_itertools"]`
`98`	`98`	`quality = ["ruff>=v0.11.0","pre-commit"]`
`99`	`99`	`tests = ["pytest>=7.4.0","deepdiff"]`
`100`	`100`	`dev = ["lighteval[accelerate,quality,tests,multilingual,math,extended_tasks,vllm]"]`