Skip to content

Commit dc4424a

Browse files
authored
fix: ensure synchronization not be used without distributed execution (#714)
1 parent ef93f0e commit dc4424a

File tree

1 file changed

+9
-7
lines changed

1 file changed

+9
-7
lines changed

lmms_eval/evaluator.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -661,13 +661,15 @@ def evaluate(
661661
else:
662662
results_dict = None
663663

664-
if distributed_executor_backend == "accelerate":
665-
# this should work for torchrun as well since it internally calls torch.distributed.barrier()
666-
Accelerator().wait_for_everyone()
667-
elif distributed_executor_backend == "torchrun":
668-
dist.barrier()
669-
else:
670-
raise ValueError(f"Invalid distributed_executor_backend: {distributed_executor_backend}. Choose either 'accelerate' or 'torchrun'.")
664+
if WORLD_SIZE > 1:
665+
# if muti-gpu, wait for all processes to finish
666+
if distributed_executor_backend == "accelerate":
667+
# this should work for torchrun as well since it internally calls torch.distributed.barrier()
668+
Accelerator().wait_for_everyone()
669+
elif distributed_executor_backend == "torchrun":
670+
dist.barrier()
671+
else:
672+
raise ValueError(f"Invalid distributed_executor_backend: {distributed_executor_backend}. Choose either 'accelerate' or 'torchrun'.")
671673

672674
return results_dict
673675

0 commit comments

Comments
 (0)