support distributed executor backend - torchrun (#680)

kaiyuyue · web-flow · commit 40365555f4ac · 2025-05-26T22:50:53.000+08:00
* support dist executor - torchrun

* support dist executor - torchrun
diff --git a/lmms_eval/evaluator.py b/lmms_eval/evaluator.py
@@ -77,6 +77,7 @@ def simple_evaluate(
     torch_random_seed: int = 1234,
     fewshot_random_seed: int = 1234,
     datetime_str: str = get_datetime_str(),
+    distributed_executor_backend: str = "accelerate",
     cli_args=None,
 ):
     """Instantiate and evaluate a model on a list of tasks.
@@ -133,7 +134,8 @@ def simple_evaluate(
         Random seed for torch. If set to None, the seed will not be set.
     :param fewshot_random_seed: int
         Random seed for fewshot sampler random generator. If set to None, the seed of generator will be set to None.
-
+    :param distributed_executor_backend: str
+        The backend to use for distributed execution, `accelerate` or `torchrun`. Defaults to "accelerate" for the `accelerate` library.
     :return
         Dictionary of results
     """
@@ -156,6 +158,8 @@ def simple_evaluate(
 
     assert tasks != [], "No tasks specified, or no tasks found. Please verify the task names."
 
+    assert distributed_executor_backend in {"accelerate", "torchrun"}, f"Invalid distributed executor backend: {distributed_executor_backend}. Choose either 'accelerate' or 'torchrun'."
+
     if gen_kwargs:
         gen_kwargs = simple_parse_args_string(gen_kwargs)
         eval_logger.warning(f"generation_kwargs specified through cli, these settings will be used over set parameters in yaml tasks.")
@@ -258,6 +262,7 @@ def _adjust_config(task_dict):
         apply_chat_template=apply_chat_template,
         fewshot_as_multiturn=fewshot_as_multiturn,
         verbosity=verbosity,
+        distributed_executor_backend=distributed_executor_backend,
         cli_args=cli_args,
     )
 
@@ -319,6 +324,7 @@ def evaluate(
     apply_chat_template: bool = False,
     fewshot_as_multiturn: bool = False,
     verbosity: str = "INFO",
+    distributed_executor_backend: str = "accelerate",
     cli_args=None,
 ):
     """Instantiate and evaluate a model on a list of tasks.
@@ -341,6 +347,8 @@ def evaluate(
         If True, apply chat template to the prompt
     :param fewshot_as_multiturn: bool
         Whether to provide the fewshot examples as a multiturn conversation or a single user turn.
+    :param distributed_executor_backend: str
+        The backend to use for distributed execution, `accelerate` or `torchrun`. Defaults to "accelerate" for the `accelerate` library.
     :return
         Dictionary of results
     """
@@ -432,8 +440,17 @@ def evaluate(
             requests[reqtype].append(instance)
 
         if lm.world_size > 1:
-            instances_rnk = torch.tensor(len(task._instances), device=lm.device)
-            gathered_item = lm.accelerator.gather(instances_rnk).cpu().detach().numpy().tolist()
+            if distributed_executor_backend == "accelerate":
+                instances_rnk = torch.tensor(len(task._instances), device=lm.device)
+                gathered_item = lm.accelerator.gather(instances_rnk).cpu().detach().numpy().tolist()
+            elif distributed_executor_backend == "torchrun":
+                instances_rnk = torch.tensor(len(task._instances), device=lm.device)
+                gathered_item = torch.zeros(lm.world_size * 1, dtype=instances_rnk.dtype, device=lm.device)
+                dist.all_gather_into_tensor(gathered_item, instances_rnk)
+                gathered_item = gathered_item.cpu().detach().numpy().tolist()
+            else:
+                raise ValueError(f"Invalid distributed_executor_backend: {distributed_executor_backend}. Choose either 'accelerate' or 'torchrun'.")
+
             # "multiple_choice" task types dispatch (several) "loglikelihood" request types
             reqtype = "loglikelihood" if task.OUTPUT_TYPE == "multiple_choice" else task.OUTPUT_TYPE
             # compute number of pseudo-batches to pad with (FSDP/DDP require even batches among ranks)
@@ -462,7 +479,12 @@ def evaluate(
             req.resps.append(x)
 
         if lm.world_size > 1:
-            lm.accelerator.wait_for_everyone()
+            if distributed_executor_backend == "accelerate":
+                lm.accelerator.wait_for_everyone()
+            elif distributed_executor_backend == "torchrun":
+                dist.barrier()
+            else:
+                raise ValueError(f"Invalid distributed_executor_backend: {distributed_executor_backend}. Choose either 'accelerate' or 'torchrun'.")
 
     RANK = lm.rank
     WORLD_SIZE = lm.world_size
@@ -638,8 +660,12 @@ def evaluate(
     else:
         results_dict = None
 
-    if hasattr(lm, "accelerator"):
+    if hasattr(lm, "accelerator") and distributed_executor_backend == "accelerate":
         lm.accelerator.wait_for_everyone()
+    elif distributed_executor_backend == "torchrun":
+        dist.barrier()
+    else:
+        raise ValueError(f"Invalid distributed_executor_backend: {distributed_executor_backend}. Choose either 'accelerate' or 'torchrun'.")
 
     return results_dict