Skip to content

Commit 4294c89

Browse files
committed
undo chnages
1 parent 07f9433 commit 4294c89

File tree

4 files changed

+9
-279
lines changed

4 files changed

+9
-279
lines changed

src/lightning/fabric/plugins/environments/lightning.py

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313
# limitations under the License.
1414

1515
import os
16+
import socket
1617

1718
from typing_extensions import override
1819

1920
from lightning.fabric.plugins.environments.cluster_environment import ClusterEnvironment
20-
from lightning.fabric.utilities.port_manager import get_port_manager
2121
from lightning.fabric.utilities.rank_zero import rank_zero_only
2222

2323

@@ -104,38 +104,18 @@ def teardown(self) -> None:
104104
if "WORLD_SIZE" in os.environ:
105105
del os.environ["WORLD_SIZE"]
106106

107-
if self._main_port != -1:
108-
get_port_manager().release_port(self._main_port)
109-
self._main_port = -1
110-
111-
os.environ.pop("MASTER_PORT", None)
112-
os.environ.pop("MASTER_ADDR", None)
113-
114107

115108
def find_free_network_port() -> int:
116109
"""Finds a free port on localhost.
117110
118111
It is useful in single-node training when we don't want to connect to a real main node but have to set the
119112
`MASTER_PORT` environment variable.
120113
121-
The allocated port is reserved and won't be returned by subsequent calls until it's explicitly released.
122-
123-
Returns:
124-
A port number that is reserved and free at the time of allocation
125-
126114
"""
127115
# If an external launcher already specified a MASTER_PORT (for example, torch.distributed.spawn or
128116
# multiprocessing helpers), reserve it through the port manager so no other test reuses the same number.
129-
if "MASTER_PORT" in os.environ:
130-
master_port_str = os.environ["MASTER_PORT"]
131-
try:
132-
existing_port = int(master_port_str)
133-
except ValueError:
134-
pass
135-
else:
136-
port_manager = get_port_manager()
137-
if port_manager.reserve_existing_port(existing_port):
138-
return existing_port
139-
140-
port_manager = get_port_manager()
141-
return port_manager.allocate_port()
117+
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
118+
s.bind(("", 0))
119+
port = s.getsockname()[1]
120+
s.close()
121+
return port

src/lightning/fabric/utilities/port_manager.py

Lines changed: 0 additions & 209 deletions
This file was deleted.

tests/tests_fabric/conftest.py

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
import contextlib
1514
import os
1615
import sys
1716
import threading
@@ -78,30 +77,10 @@ def restore_env_variables():
7877
@pytest.fixture(autouse=True)
7978
def teardown_process_group():
8079
"""Ensures that the distributed process group gets closed before the next test runs."""
81-
from lightning.fabric.utilities.port_manager import get_port_manager
82-
83-
# Record the port used in this test (if any)
84-
port_to_release = None
85-
if "MASTER_PORT" in os.environ:
86-
with contextlib.suppress(ValueError, KeyError):
87-
port_to_release = int(os.environ["MASTER_PORT"])
88-
8980
yield
90-
91-
# Clean up distributed connection
9281
_destroy_dist_connection()
9382

94-
manager = get_port_manager()
95-
96-
# Release the port from the manager so it can be reused
97-
if port_to_release is not None:
98-
manager.release_port(port_to_release)
99-
100-
# If the process group updated MASTER_PORT, reserve and clear it to avoid leaking between tests
101-
if "MASTER_PORT" in os.environ:
102-
with contextlib.suppress(ValueError):
103-
manager.reserve_existing_port(int(os.environ["MASTER_PORT"]))
104-
os.environ.pop("MASTER_PORT", None)
83+
os.environ.pop("MASTER_PORT", None)
10584

10685

10786
@pytest.fixture(autouse=True)

tests/tests_pytorch/conftest.py

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
import contextlib
1514
import os
1615
import signal
1716
import sys
@@ -128,30 +127,11 @@ def restore_signal_handlers():
128127
@pytest.fixture(autouse=True)
129128
def teardown_process_group():
130129
"""Ensures that the distributed process group gets closed before the next test runs."""
131-
from lightning.fabric.utilities.port_manager import get_port_manager
132-
133-
# Record the port used in this test (if any)
134-
port_to_release = None
135-
if "MASTER_PORT" in os.environ:
136-
with contextlib.suppress(ValueError, KeyError):
137-
port_to_release = int(os.environ["MASTER_PORT"])
138-
139130
yield
140-
141-
# Clean up distributed connection
142131
_destroy_dist_connection()
143132

144-
manager = get_port_manager()
145-
146-
# Release the port from the manager so it can be reused
147-
if port_to_release is not None:
148-
manager.release_port(port_to_release)
149-
150-
# If the process group updated MASTER_PORT, reserve and clear it to avoid leaking between tests
151-
if "MASTER_PORT" in os.environ:
152-
with contextlib.suppress(ValueError):
153-
manager.reserve_existing_port(int(os.environ["MASTER_PORT"]))
154-
os.environ.pop("MASTER_PORT", None)
133+
# clean MASTER_PORT environment variable, to avoid leaking between tests
134+
os.environ.pop("MASTER_PORT", None)
155135

156136

157137
@pytest.fixture(autouse=True)

0 commit comments

Comments
 (0)