Skip to content

Commit 2a1e9b6

Browse files
authored
Merge pull request #516 from booxter/pull-req
Implicit distributed backend selection
2 parents ef49f97 + 4be7e28 commit 2a1e9b6

File tree

2 files changed

+3
-7
lines changed

2 files changed

+3
-7
lines changed

requirements.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@ packaging>=20.9
22
wheel>=0.43
33
pyyaml
44
py-cpuinfo
5-
# we set this to be above 0a0 so that it doesn't
6-
# replace custom pytorch images with the 2.3.0
7-
torch>=2.3.0a0
5+
torch>=2.6.0
86
transformers>=4.45.2
97

108
datasets>=2.15.0

src/instructlab/training/main_ds.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from copy import deepcopy
55
import argparse
66
import datetime
7-
import functools
87
import logging
98
import os
109
import subprocess
@@ -328,11 +327,10 @@ def main(args):
328327
args.local_rank = int(os.environ["LOCAL_RANK"])
329328

330329
timeout = _get_collective_timeout()
331-
init = functools.partial(torch.distributed.init_process_group, "nccl")
332330
if timeout is not None:
333-
init(timeout=timeout)
331+
torch.distributed.init_process_group(timeout=timeout)
334332
else:
335-
init()
333+
torch.distributed.init_process_group()
336334

337335
args.global_rank = torch.distributed.get_rank()
338336
tensor = torch.ByteTensor([False]).cuda()

0 commit comments

Comments
 (0)