Skip to content

Commit 8774767

Browse files
author
Hossein Kavianihamedani
committed
Remove _init_dist() method from BaseForgeActor after upstream removal
- Removed _init_dist() method and its call from BaseForgeActor.__init__() - This method was removed in upstream PR #561 - Distributed initialization is now handled by the provisioner - Fixed linting issues: removed unused 'os' import, combined __init__ docstring with class docstring - Keeps SFT_Notebook branch compatible with latest upstream changes
1 parent ec7d352 commit 8774767

File tree

1 file changed

+3
-30
lines changed

1 file changed

+3
-30
lines changed

apps/sft/actor.py

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414
import logging
1515
import math
16-
import os
1716
from abc import ABC, abstractmethod
1817
from typing import Any, Optional
1918

@@ -45,6 +44,9 @@ class BaseForgeActor(ForgeActor, ForgeEngine, ABC):
4544
4645
This class handles common initialization, distributed setup, and provides
4746
abstract methods that must be implemented by concrete actor classes.
47+
48+
Args:
49+
config: Configuration dictionary containing job settings
4850
"""
4951

5052
job_config: ForgeJobConfig
@@ -60,12 +62,6 @@ class BaseForgeActor(ForgeActor, ForgeEngine, ABC):
6062
device: torch.device
6163

6264
def __init__(self, config: DictConfig):
63-
"""
64-
Initialize the base actor with configuration.
65-
66-
Args:
67-
config: Configuration dictionary containing job settings
68-
"""
6965
job_config = ForgeJobConfig().to_dict()
7066
job_config = OmegaConf.merge(job_config, config)
7167

@@ -75,31 +71,8 @@ def __init__(self, config: DictConfig):
7571
self._rank = current_rank().rank
7672
self._size = math.prod(current_size().values())
7773

78-
self._init_dist()
7974
super().__init__(job_config)
8075

81-
def _init_dist(self):
82-
"""
83-
Initialize torch distributed environment.
84-
85-
Sets up environment variables required for distributed training
86-
in the Monarch actor framework.
87-
"""
88-
env = {
89-
"RANK": str(self._rank),
90-
"LOCAL_RANK": str(self._rank),
91-
"LOCAL_WORLD_SIZE": str(self._size),
92-
"GROUP_RANK": str(self._size),
93-
"GROUP_WORLD_SIZE": str(self._size),
94-
"ROLE_RANK": str(self._rank),
95-
"ROLE_WORLD_SIZE": str(self._size),
96-
"ROLE_NAME": "rank",
97-
"WORLD_SIZE": str(self._size),
98-
"PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True",
99-
}
100-
os.environ.update(env)
101-
logger.info(f"Initialized distributed environment: {env}")
102-
10376
@abstractmethod
10477
async def setup(self):
10578
"""

0 commit comments

Comments
 (0)