Skip to content

Commit f84bcb3

Browse files
jafraustrosoumith
authored andcommitted
add min_gpu verification similart tu tensor_parallel_example.py example
Signed-off-by: jafraustro <[email protected]>
1 parent eab402d commit f84bcb3

File tree

2 files changed

+13
-3
lines changed

2 files changed

+13
-3
lines changed

distributed/ddp/example.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111

1212
from torch.nn.parallel import DistributedDataParallel as DDP
1313

14+
def verify_min_gpu_count(min_gpus: int = 2) -> bool:
15+
""" verification that we have at least 2 gpus to run dist examples """
16+
has_gpu = torch.accelerator.is_available()
17+
gpu_count = torch.accelerator.device_count()
18+
return has_gpu and gpu_count >= min_gpus
19+
1420
class ToyModel(nn.Module):
1521
def __init__(self):
1622
super(ToyModel, self).__init__()
@@ -88,4 +94,8 @@ def main():
8894
dist.destroy_process_group()
8995

9096
if __name__ == "__main__":
97+
_min_gpu_count = 2
98+
if not verify_min_gpu_count(min_gpus=_min_gpu_count):
99+
print(f"Unable to locate sufficient {_min_gpu_count} gpus to run this example. Exiting.")
100+
sys.exit()
91101
main()

distributed/ddp/run_example.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# /bin/bash
22
# bash run_example.sh {file_to_run.py} {num_gpus}
33
# where file_to_run = example to run. Default = 'example.py'
4-
# num_gpus = num local gpus to use (must be at least 2). Default = 4
4+
# num_gpus = num local gpus to use (must be at least 2). Default = 2
55

66
# samples to run include:
77
# example.py
88

9-
echo "Launching ${1:-example.py} with ${2:-4} gpus"
10-
torchrun --nnodes=1 --nproc_per_node=${2:-4} --rdzv_id=101 --rdzv_endpoint="localhost:5972" ${1:-example.py}
9+
echo "Launching ${1:-example.py} with ${2:-2} gpus"
10+
torchrun --nnodes=1 --nproc_per_node=${2:-2} --rdzv_id=101 --rdzv_endpoint="localhost:5972" ${1:-example.py}

0 commit comments

Comments
 (0)