16
16
import socket
17
17
from typing import Dict , List
18
18
19
+ from typing_extensions import override
20
+
19
21
from lightning .fabric .plugins .environments .cluster_environment import ClusterEnvironment
20
22
from lightning .fabric .utilities .cloud_io import get_filesystem
21
23
@@ -62,27 +64,32 @@ def _set_init_progress_group_env_vars(self) -> None:
62
64
log .debug (f"MASTER_PORT: { os .environ ['MASTER_PORT' ]} " )
63
65
64
66
@property
67
+ @override
65
68
def creates_processes_externally (self ) -> bool :
66
69
"""LSF creates subprocesses, i.e., PyTorch Lightning does not need to spawn them."""
67
70
return True
68
71
69
72
@property
73
+ @override
70
74
def main_address (self ) -> str :
71
75
"""The main address is read from an OpenMPI host rank file in the environment variable
72
76
``LSB_DJOB_RANKFILE``."""
73
77
return self ._main_address
74
78
75
79
@property
80
+ @override
76
81
def main_port (self ) -> int :
77
82
"""The main port is calculated from the LSF job ID."""
78
83
return self ._main_port
79
84
80
85
@staticmethod
86
+ @override
81
87
def detect () -> bool :
82
88
"""Returns ``True`` if the current process was launched using the ``jsrun`` command."""
83
89
required_env_vars = {"LSB_JOBID" , "LSB_DJOB_RANKFILE" , "JSM_NAMESPACE_LOCAL_RANK" , "JSM_NAMESPACE_SIZE" }
84
90
return required_env_vars .issubset (os .environ .keys ())
85
91
92
+ @override
86
93
def world_size (self ) -> int :
87
94
"""The world size is read from the environment variable ``JSM_NAMESPACE_SIZE``."""
88
95
world_size = os .environ .get ("JSM_NAMESPACE_SIZE" )
@@ -93,9 +100,11 @@ def world_size(self) -> int:
93
100
)
94
101
return int (world_size )
95
102
103
+ @override
96
104
def set_world_size (self , size : int ) -> None :
97
105
log .debug ("LSFEnvironment.set_world_size was called, but setting world size is not allowed. Ignored." )
98
106
107
+ @override
99
108
def global_rank (self ) -> int :
100
109
"""The world size is read from the environment variable ``JSM_NAMESPACE_RANK``."""
101
110
global_rank = os .environ .get ("JSM_NAMESPACE_RANK" )
@@ -106,9 +115,11 @@ def global_rank(self) -> int:
106
115
)
107
116
return int (global_rank )
108
117
118
+ @override
109
119
def set_global_rank (self , rank : int ) -> None :
110
120
log .debug ("LSFEnvironment.set_global_rank was called, but setting global rank is not allowed. Ignored." )
111
121
122
+ @override
112
123
def local_rank (self ) -> int :
113
124
"""The local rank is read from the environment variable `JSM_NAMESPACE_LOCAL_RANK`."""
114
125
local_rank = os .environ .get ("JSM_NAMESPACE_LOCAL_RANK" )
@@ -119,6 +130,7 @@ def local_rank(self) -> int:
119
130
)
120
131
return int (local_rank )
121
132
133
+ @override
122
134
def node_rank (self ) -> int :
123
135
"""The node rank is determined by the position of the current hostname in the OpenMPI host rank file stored in
124
136
``LSB_DJOB_RANKFILE``."""
0 commit comments